diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -192,6 +192,10 @@ SmallVectorImpl &VRegs, SmallVectorImpl &LeftoverVRegs); + /// Version which handles irregular sub-vector splits. + void extractVectorParts(Register Reg, unsigned NumElst, + SmallVectorImpl &VRegs); + /// Helper function to build a wide generic register \p DstReg of type \p /// RegTy from smaller parts. This will produce a G_MERGE_VALUES, /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate @@ -205,6 +209,11 @@ LLT PartTy, ArrayRef PartRegs, LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); + /// Merge \p PartRegs with different types into \p DstReg. + void mergeMixedSubvectors(Register DstReg, ArrayRef PartRegs); + + void appendVectorElts(SmallVectorImpl &Elts, Register Reg); + /// Unmerge \p SrcReg into smaller sized values, and append them to \p /// Parts. The elements of \p Parts will be the greatest common divisor type /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and @@ -285,26 +294,18 @@ /// vector bounds. Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index); - LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); - - /// Legalize a instruction with a vector type where each operand may have a - /// different element type. All type indexes must have the same number of - /// elements. - LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); + /// Handles most opcodes. Split \p MI into same instruction on sub-vectors or + /// scalars with \p NumElts elements (1 for scalar). Supports uneven splits: + /// there can be leftover sub-vector with fewer then \p NumElts or a leftover + /// scalar. To avoid this use moreElements first and set MI number of elements + /// to multiple of \p NumElts. Non-vector operands that should be used on all + /// sub-instructions without split are listed in \p NonVecOpIndices. + LegalizeResult fewerElementsVectorMultiEltType( + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list NonVecOpIndices = {}); - LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - - LegalizeResult - fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - - LegalizeResult - fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - - LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts); LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); @@ -320,22 +321,9 @@ unsigned TypeIdx, LLT NarrowTy); - LegalizeResult fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy); - /// Legalize an instruction by reducing the operation width, either by - /// narrowing the type of the operation or by reducing the number of elements - /// of a vector. - /// The used strategy (narrow vs. fewerElements) is decided by \p NarrowTy. - /// Narrow is used if the scalar type of \p NarrowTy and \p DstTy differ, - /// fewerElements is used when the scalar type is the same but the number of - /// elements between \p NarrowTy and \p DstTy differ. - LegalizeResult reduceOperationWidth(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -58,7 +58,10 @@ /// The (vector) operation should be implemented by splitting it into /// sub-vectors where the operation is legal. For example a <8 x s64> add - /// might be implemented as 4 separate <2 x s64> adds. + /// might be implemented as 4 separate <2 x s64> adds. There can be a leftover + /// if there are not enough elements for last sub-vector e.g. <7 x s64> add + /// will be implemented as 3 separate <2 x s64> adds and one s64 add. Leftover + /// types can be avoided by doing MoreElements first. FewerElements, /// The (vector) operation should be implemented by widening the input @@ -1050,6 +1053,26 @@ TypeIdx, LLT::fixed_vector(MinElements, VecTy.getElementType())); }); } + + /// Set number of elements to nearest larger multiple of NumElts. + LegalizeRuleSet &alignNumElementsTo(unsigned TypeIdx, const LLT EltTy, + unsigned NumElts) { + typeIdx(TypeIdx); + return actionIf( + LegalizeAction::MoreElements, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return VecTy.isVector() && VecTy.getElementType() == EltTy && + (VecTy.getNumElements() % NumElts != 0); + }, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + unsigned NewSize = alignTo(VecTy.getNumElements(), NumElts); + return std::make_pair( + TypeIdx, LLT::fixed_vector(NewSize, VecTy.getElementType())); + }); + } + /// Limit the number of elements in EltTy vectors to at most MaxElements. LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements) { @@ -1085,6 +1108,19 @@ .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements()); } + /// Express \p EltTy vectors strictly using vectors with \p NumElts elements + /// (or scalars when \p NumElts equals 1). + /// First pad with undef elements to nearest larger multiple of \p NumElts. + /// Then perform split with all sub-instructions having the same type. + /// Using clampMaxNumElements (non-strict) can result in leftover instruction + /// with different type (fewer elements then \p NumElts or scalar). + /// No effect if the type is not a vector. + LegalizeRuleSet &clampMaxNumElementsStrict(unsigned TypeIdx, const LLT EltTy, + unsigned NumElts) { + return alignNumElementsTo(TypeIdx, EltTy, NumElts) + .clampMaxNumElements(TypeIdx, EltTy, NumElts); + } + /// Fallback on the previous implementation. This should only be used while /// porting a rule. LegalizeRuleSet &fallback() { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -497,6 +497,34 @@ MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits); + /// Build and insert + /// a, b, ..., x = G_UNMERGE_VALUES \p Op0 + /// \p Res = G_BUILD_VECTOR a, b, ..., x, undef, ..., undef + /// + /// Pad \p Op0 with undef elements to match number of elements in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with vector type, + /// same vector element type and Op0 must have fewer elements then Res. + /// + /// \return a MachineInstrBuilder for the newly created build vector instr. + MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0); + + /// Build and insert + /// a, b, ..., x, y, z = G_UNMERGE_VALUES \p Op0 + /// \p Res = G_BUILD_VECTOR a, b, ..., x + /// + /// Delete trailing elements in \p Op0 to match number of elements in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with vector type, + /// same vector element type and Op0 must have more elements then Res. + /// + /// \return a MachineInstrBuilder for the newly created build vector instr. + MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0); + /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1 /// /// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -323,6 +323,11 @@ LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy); +LLVM_READNONE +/// Return smallest type that covers both \p OrigTy and \p TargetTy and is +/// multiple of TargetTy. +LLT getCoverTy(LLT OrigTy, LLT TargetTy); + /// Return a type where the total size is the greatest common divisor of \p /// OrigTy and \p TargetTy. This will try to either change the number of vector /// elements, or bitwidth of scalars. The intent is the result type can be used diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -256,7 +256,7 @@ LLT PartLLT = MRI.getType(SrcRegs[0]); // Deal with v3s16 split into v2s16 - LLT LCMTy = getLCMType(LLTy, PartLLT); + LLT LCMTy = getCoverTy(LLTy, PartLLT); if (LCMTy == LLTy) { // Common case where no padding is needed. assert(DstRegs.size() == 1); @@ -267,21 +267,9 @@ // widening the original value. Register UnmergeSrcReg; if (LCMTy != PartLLT) { - // e.g. A <3 x s16> value was split to <2 x s16> - // %register_value0:_(<2 x s16>) - // %register_value1:_(<2 x s16>) - // %undef:_(<2 x s16>) = G_IMPLICIT_DEF - // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef - // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat - const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); - Register Undef = B.buildUndef(PartLLT).getReg(0); - - // Build vector of undefs. - SmallVector WidenedSrcs(NumWide, Undef); - - // Replace the first sources with the real registers. - std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); - UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0); + assert(DstRegs.size() == 1); + return B.buildDeleteTrailingVectorElements(DstRegs[0], + B.buildMerge(LCMTy, SrcRegs)); } else { // We don't need to widen anything if we're extracting a scalar which was // promoted to a vector e.g. s8 -> v4s8 -> s8 @@ -298,6 +286,8 @@ for (int I = DstRegs.size(); I != NumDst; ++I) PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); + if (PadDstRegs.size() == 1) + return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg); return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); } @@ -485,7 +475,7 @@ MachineRegisterInfo &MRI = *B.getMRI(); LLT DstTy = MRI.getType(DstRegs[0]); - LLT LCMTy = getLCMType(SrcTy, PartTy); + LLT LCMTy = getCoverTy(SrcTy, PartTy); const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); @@ -493,7 +483,7 @@ Register UnmergeSrc = SrcReg; - if (CoveringSize != SrcSize) { + if (!LCMTy.isVector() && CoveringSize != SrcSize) { // For scalars, it's common to be able to use a simple extension. if (SrcTy.isScalar() && DstTy.isScalar()) { CoveringSize = alignTo(SrcSize, DstSize); @@ -510,14 +500,10 @@ } } - // Unmerge to the original registers and pad with dead defs. - SmallVector UnmergeResults(DstRegs.begin(), DstRegs.end()); - for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize; - Size += DstSize) { - UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); - } + if (LCMTy.isVector() && CoveringSize != SrcSize) + UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0); - B.buildUnmerge(UnmergeResults, UnmergeSrc); + B.buildUnmerge(DstRegs, UnmergeSrc); } bool CallLowering::determineAndHandleAssignments( diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -176,16 +176,18 @@ return true; } + // Perform irregular split. Leftover is last element of RegPieces. if (MainTy.isVector()) { - unsigned EltSize = MainTy.getScalarSizeInBits(); - if (LeftoverSize % EltSize != 0) - return false; - LeftoverTy = LLT::scalarOrVector( - ElementCount::getFixed(LeftoverSize / EltSize), EltSize); - } else { - LeftoverTy = LLT::scalar(LeftoverSize); + SmallVector RegPieces; + extractVectorParts(Reg, MainTy.getNumElements(), RegPieces); + for (unsigned i = 0; i < RegPieces.size() - 1; ++i) + VRegs.push_back(RegPieces[i]); + LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]); + LeftoverTy = MRI.getType(LeftoverRegs[0]); + return true; } + LeftoverTy = LLT::scalar(LeftoverSize); // For irregular sizes, extract the individual parts. for (unsigned I = 0; I != NumParts; ++I) { Register NewReg = MRI.createGenericVirtualRegister(MainTy); @@ -203,6 +205,44 @@ return true; } +void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts, + SmallVectorImpl &VRegs) { + LLT RegTy = MRI.getType(Reg); + assert(RegTy.isVector() && "Expected a vector type"); + + LLT EltTy = RegTy.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + unsigned RegNumElts = RegTy.getNumElements(); + unsigned LeftoverNumElts = RegNumElts % NumElts; + unsigned NumNarrowTyPieces = RegNumElts / NumElts; + + // Perfect split without leftover + if (LeftoverNumElts == 0) + return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs); + + // Irregular split. Provide direct access to all elements for artifact + // combiner using unmerge to elements. Then build vectors with NumElts + // elements. Remaining element(s) will be (used to build vector) Leftover. + SmallVector Elts; + extractParts(Reg, EltTy, RegNumElts, Elts); + + unsigned Offset = 0; + // Requested sub-vectors of NarrowTy. + for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) { + ArrayRef Pieces(&Elts[Offset], NumElts); + VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } + + // Leftover element(s). + if (LeftoverNumElts == 1) { + VRegs.push_back(Elts[Offset]); + } else { + LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy); + ArrayRef Pieces(&Elts[Offset], LeftoverNumElts); + VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0)); + } +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef PartRegs, @@ -223,6 +263,15 @@ return; } + // Merge sub-vectors with different number of elements and insert into DstReg. + if (ResultTy.isVector()) { + assert(LeftoverRegs.size() == 1 && "Expected one leftover register"); + SmallVector AllRegs; + for (auto Reg : concat(PartRegs, LeftoverRegs)) + AllRegs.push_back(Reg); + return mergeMixedSubvectors(DstReg, AllRegs); + } + SmallVector GCDRegs; LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy); for (auto PartReg : concat(PartRegs, LeftoverRegs)) @@ -231,6 +280,31 @@ buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); } +void LegalizerHelper::appendVectorElts(SmallVectorImpl &Elts, + Register Reg) { + LLT Ty = MRI.getType(Reg); + SmallVector RegElts; + extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts); + Elts.append(RegElts); +} + +/// Merge \p PartRegs with different types into \p DstReg. +void LegalizerHelper::mergeMixedSubvectors(Register DstReg, + ArrayRef PartRegs) { + SmallVector AllElts; + for (unsigned i = 0; i < PartRegs.size() - 1; ++i) + appendVectorElts(AllElts, PartRegs[i]); + + Register Leftover = PartRegs[PartRegs.size() - 1]; + if (MRI.getType(Leftover).isScalar()) + AllElts.push_back(Leftover); + else + appendVectorElts(AllElts, Leftover); + + MIRBuilder.buildMerge(DstReg, AllElts); + return; +} + /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl &Regs, const MachineInstr &MI) { @@ -916,8 +990,26 @@ return Legalized; } - case TargetOpcode::G_FREEZE: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_FREEZE: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + // Should widen scalar first + if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0) + return UnableToLegalize; + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + SmallVector Parts; + for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) { + Parts.push_back( + MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0)); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_SADDO: @@ -1372,37 +1464,17 @@ unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); + Register Dst = MO.getReg(); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + MO.setReg(DstExt); + MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - - LLT OldTy = MRI.getType(MO.getReg()); - unsigned OldElts = OldTy.getNumElements(); - unsigned NewElts = MoreTy.getNumElements(); - - unsigned NumParts = NewElts / OldElts; - - // Use concat_vectors if the result is a multiple of the number of elements. - if (NumParts * OldElts == NewElts) { - SmallVector Parts; - Parts.push_back(MO.getReg()); - - Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); - for (unsigned I = 1; I != NumParts; ++I) - Parts.push_back(ImpDef); - - auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); - MO.setReg(Concat.getReg(0)); - return; - } - - Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); - Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); - MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); - MO.setReg(MoreReg); + SmallVector Regs; + MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0)); } void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { @@ -3558,20 +3630,81 @@ return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LCMTy = getLCMType(DstTy, NarrowTy); +/// Check that all vector operands have same number of elements. Other operands +/// should be listed in NonVecOp. +static bool hasSameNumEltsOnAllVectorOperands( + GenericMachineInstr &MI, MachineRegisterInfo &MRI, + std::initializer_list NonVecOpIndices) { + if (MI.getNumMemOperands() != 0) + return false; - unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + LLT VecTy = MRI.getType(MI.getReg(0)); + if (!VecTy.isVector()) + return false; + unsigned NumElts = VecTy.getNumElements(); - auto NewUndef = MIRBuilder.buildUndef(NarrowTy); - SmallVector Parts(NumParts, NewUndef.getReg(0)); + for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + continue; + } - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - MI.eraseFromParent(); - return Legalized; + LLT Ty = MRI.getType(Op.getReg()); + if (!Ty.isVector()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + is_contained(NonVecOpIndices, OpIdx); + continue; + } + + if (Ty.getNumElements() != NumElts) + return false; + } + + return true; +} + +/// Fill \p DstOps with DstOps that have same number of elements combined as +/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are +/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple +/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements. +static void makeDstOps(SmallVectorImpl &DstOps, LLT Ty, + unsigned NumElts) { + LLT LeftoverTy; + assert(Ty.isVector() && "Expected vector type"); + LLT EltTy = Ty.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) = + getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy); + + assert(NumParts > 0 && "Error in getNarrowTypeBreakDown"); + for (int i = 0; i < NumParts; ++i) { + DstOps.push_back(NarrowTy); + } + + if (LeftoverTy.isValid()) { + assert(NumLeftover == 1 && "expected exactly one leftover"); + DstOps.push_back(LeftoverTy); + } +} + +/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps +/// made from \p Op depending on operand type. +static void broadcastSrcOp(SmallVectorImpl &Ops, unsigned N, + MachineOperand &Op) { + for (unsigned i = 0; i < N; ++i) { + if (Op.isReg()) + Ops.push_back(Op.getReg()); + else if (Op.isImm()) + Ops.push_back(Op.getImm()); + else if (Op.isPredicate()) + Ops.push_back(static_cast(Op.getPredicate())); + else + llvm_unreachable("Unsupported type"); + } } // Handle splitting vector operations which need to have the same number of @@ -3588,335 +3721,116 @@ // s64 = G_SHL s64, s32 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMultiEltType( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { - if (TypeIdx != 0) - return UnableToLegalize; - - const LLT NarrowTy0 = NarrowTyArg; - const Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LeftoverTy0; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; - if (NumParts < 0) - return UnableToLegalize; - - SmallVector NewInsts; - - SmallVector DstRegs, LeftoverDstRegs; - SmallVector PartRegs, LeftoverRegs; - - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); - LLT SrcTyI = MRI.getType(SrcReg); - const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() - : ElementCount::getFixed(1); - LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); - LLT LeftoverTyI; - - // Split this operand into the requested typed registers, and any leftover - // required to reproduce the original type. - if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - if (I == 1) { - // For the first operand, create an instruction for each part and setup - // the result. - for (Register PartReg : PartRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(PartReg)); - DstRegs.push_back(PartDstReg); - } - - for (Register LeftoverReg : LeftoverRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(LeftoverReg)); - LeftoverDstRegs.push_back(PartDstReg); - } + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list NonVecOpIndices) { + assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) && + "Non-compatible opcode or not specified non-vector operands"); + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output. + // Build instructions with DstOps to use instruction found by CSE directly. + // CSE copies found instruction into given vreg when building with vreg dest. + SmallVector, 2> OutputOpsPieces(NumDefs); + // Output registers will be taken from created instructions. + SmallVector, 2> OutputRegs(NumDefs); + for (unsigned i = 0; i < NumDefs; ++i) { + makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts); + } + + // Split vector input operands into sub-vectors with NumElts elts + Leftover. + // Operands listed in NonVecOpIndices will be used as is without splitting; + // examples: compare predicate in icmp and fcmp (op 1), vector select with i1 + // scalar condition (op 1), immediate in sext_inreg (op 2). + SmallVector, 3> InputOpsPieces(NumInputs); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + ++UseIdx, ++UseNo) { + if (is_contained(NonVecOpIndices, UseIdx)) { + broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(), + MI.getOperand(UseIdx)); } else { - assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - unsigned InstCount = 0; - for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(PartRegs[J]); - for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(LeftoverRegs[J]); + SmallVector SplitPieces; + extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces); + for (auto Reg : SplitPieces) + InputOpsPieces[UseNo].push_back(Reg); } - - PartRegs.clear(); - LeftoverRegs.clear(); } - // Insert the newly built operations and rebuild the result register. - for (auto &MIB : NewInsts) - MIRBuilder.insertInstr(MIB); + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; - insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); + // Take i-th piece of each input operand split and build sub-vector/scalar + // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s). + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + SmallVector Defs; + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + Defs.push_back(OutputOpsPieces[DstNo][i]); - MI.eraseFromParent(); - return Legalized; -} + SmallVector Uses; + for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo) + Uses.push_back(InputOpsPieces[InputNo][i]); -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - - LLT NarrowTy0 = NarrowTy; - LLT NarrowTy1; - unsigned NumParts; - - if (NarrowTy.isVector()) { - // Uneven breakdown not handled. - NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); - if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) - return UnableToLegalize; - - NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); - } else { - NumParts = DstTy.getNumElements(); - NarrowTy1 = SrcTy.getElementType(); + auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags()); + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + OutputRegs[DstNo].push_back(I.getReg(DstNo)); } - SmallVector SrcRegs, DstRegs; - extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); - - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MachineInstr *NewInst = - MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); - - NewInst->setFlags(MI.getFlags()); - DstRegs.push_back(DstReg); + // Merge small outputs into MI's output for each def operand. + if (NumLeftovers) { + for (unsigned i = 0; i < NumDefs; ++i) + mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]); + } else { + for (unsigned i = 0; i < NumDefs; ++i) + MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - MI.eraseFromParent(); return Legalized; } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(Src0Reg); - - unsigned NumParts; - LLT NarrowTy0, NarrowTy1; - - if (TypeIdx == 0) { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = DstTy.getNumElements(); - - NarrowTy0 = NarrowTy; - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); - NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), - SrcTy.getScalarSizeInBits()) - : SrcTy.getElementType(); - - } else { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = SrcTy.getNumElements(); - - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : - NarrowTy.getNumElements(); - NarrowTy0 = - LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); - NarrowTy1 = NarrowTy; +LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts) { + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + SmallVector OutputOpsPieces; + SmallVector OutputRegs; + makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts); + + // Instructions that perform register split will be inserted in basic block + // where register is defined (basic block is in the next operand). + SmallVector, 3> InputOpsPieces(NumInputs / 2); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + UseIdx += 2, ++UseNo) { + MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]); } - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (NarrowTy1.isVector() && - NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) - return UnableToLegalize; - - CmpInst::Predicate Pred - = static_cast(MI.getOperand(1).getPredicate()); - - SmallVector Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + // Build PHIs with fewer elements. + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; + MIRBuilder.setInsertPt(*MI.getParent(), MI); + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI); + Phi.addDef( + MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI))); + OutputRegs.push_back(Phi.getReg(0)); - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - DstRegs.push_back(DstReg); - - if (MI.getOpcode() == TargetOpcode::G_ICMP) - MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - else { - MachineInstr *NewCmp - = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - NewCmp->setFlags(MI.getFlags()); + for (unsigned j = 0; j < NumInputs / 2; ++j) { + Phi.addUse(InputOpsPieces[j][i]); + Phi.add(MI.getOperand(1 + j * 2 + 1)); } } - if (NarrowTy1.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register CondReg = MI.getOperand(1).getReg(); - - unsigned NumParts = 0; - LLT NarrowTy0, NarrowTy1; - - LLT DstTy = MRI.getType(DstReg); - LLT CondTy = MRI.getType(CondReg); - unsigned Size = DstTy.getSizeInBits(); - - assert(TypeIdx == 0 || CondTy.isVector()); - - if (TypeIdx == 0) { - NarrowTy0 = NarrowTy; - NarrowTy1 = CondTy; - - unsigned NarrowSize = NarrowTy0.getSizeInBits(); - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) - return UnableToLegalize; - - NumParts = Size / NarrowSize; - - // Need to break down the condition type - if (CondTy.isVector()) { - if (CondTy.getNumElements() == NumParts) - NarrowTy1 = CondTy.getElementType(); - else - NarrowTy1 = - LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), - CondTy.getScalarSizeInBits()); - } + // Merge small outputs into MI's def. + if (NumLeftovers) { + mergeMixedSubvectors(MI.getReg(0), OutputRegs); } else { - NumParts = CondTy.getNumElements(); - if (NarrowTy.isVector()) { - // TODO: Handle uneven breakdown. - if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) - return UnableToLegalize; - - return UnableToLegalize; - } else { - NarrowTy0 = DstTy.getElementType(); - NarrowTy1 = NarrowTy; - } - } - - SmallVector DstRegs, Src0Regs, Src1Regs, Src2Regs; - if (CondTy.isVector()) - extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); - - extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); - - for (unsigned i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, - Src1Regs[i], Src2Regs[i]); - DstRegs.push_back(DstReg); - } - - if (NarrowTy0.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - const Register DstReg = MI.getOperand(0).getReg(); - LLT PhiTy = MRI.getType(DstReg); - LLT LeftoverTy; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts, NumLeftover; - std::tie(NumParts, NumLeftover) - = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); - if (NumParts < 0) - return UnableToLegalize; - - SmallVector DstRegs, LeftoverDstRegs; - SmallVector NewInsts; - - const int TotalNumParts = NumParts + NumLeftover; - - // Insert the new phis in the result block first. - for (int I = 0; I != TotalNumParts; ++I) { - LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; - Register PartDstReg = MRI.createGenericVirtualRegister(Ty); - NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) - .addDef(PartDstReg)); - if (I < NumParts) - DstRegs.push_back(PartDstReg); - else - LeftoverDstRegs.push_back(PartDstReg); - } - - MachineBasicBlock *MBB = MI.getParent(); - MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); - insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); - - SmallVector PartRegs, LeftoverRegs; - - // Insert code to extract the incoming values in each predecessor block. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { - PartRegs.clear(); - LeftoverRegs.clear(); - - Register SrcReg = MI.getOperand(I).getReg(); - MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); - - LLT Unused; - if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - for (int J = 0; J != TotalNumParts; ++J) { - MachineInstrBuilder MIB = NewInsts[J]; - MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); - MIB.addMBB(&OpMBB); - } + MIRBuilder.buildMerge(MI.getReg(0), OutputRegs); } MI.eraseFromParent(); @@ -3927,27 +3841,36 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - if (TypeIdx != 1) - return UnableToLegalize; - const int NumDst = MI.getNumOperands() - 1; const Register SrcReg = MI.getOperand(NumDst).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(SrcReg); - // TODO: Create sequence of extracts. - if (DstTy == NarrowTy) + if (TypeIdx != 1 || NarrowTy == DstTy) return UnableToLegalize; - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - if (DstTy == GCDTy) { - // This would just be a copy of the same unmerge. - // TODO: Create extracts, pad with undef and create intermediate merges. + // Requires compatible types. Otherwise SrcReg should have been defined by + // merge-like instruction that would get artifact combined. Most likely + // instruction that defines SrcReg has to perform more/fewer elements + // legalization compatible with NarrowTy. + assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + + if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0)) return UnableToLegalize; - } - auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + // This is most likely DstTy (smaller then register size) packed in SrcTy + // (larger then register size) and since unmerge was not combined it will be + // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy + // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy. + + // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy) + // + // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence + // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg + // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy) + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg); const int NumUnmerge = Unmerge->getNumOperands() - 1; const int PartsPerUnmerge = NumDst / NumUnmerge; @@ -3963,90 +3886,88 @@ return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register Result = MI.getOperand(0).getReg(); - Register Overflow = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); - - LLT SrcTy = MRI.getType(LHS); - if (!SrcTy.isVector()) - return UnableToLegalize; - - LLT ElementType = SrcTy.getElementType(); - LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); - const ElementCount NumResult = SrcTy.getElementCount(); - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - - // Unmerge the operands to smaller parts of GCD type. - auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); - auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); - - const int NumOps = UnmergeLHS->getNumOperands() - 1; - const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); - LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); - LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); - - // Perform the operation over unmerged parts. - SmallVector ResultParts; - SmallVector OverflowParts; - for (int I = 0; I != NumOps; ++I) { - Register Operand1 = UnmergeLHS->getOperand(I).getReg(); - Register Operand2 = UnmergeRHS->getOperand(I).getReg(); - auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, - {Operand1, Operand2}); - ResultParts.push_back(PartMul->getOperand(0).getReg()); - OverflowParts.push_back(PartMul->getOperand(1).getReg()); - } - - LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); - LLT OverflowLCMTy = - LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); - - // Recombine the pieces to the original result and overflow registers. - buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); - buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); - MI.eraseFromParent(); - return Legalized; -} - -// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces -// a vector -// -// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with -// undef as necessary. -// -// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 -// -> <2 x s16> -// -// %4:_(s16) = G_IMPLICIT_DEF -// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 -// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 -// %7:_(<2 x s16>) = G_IMPLICIT_DEF -// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 -// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + // Requires compatible types. Otherwise user of DstReg did not perform unmerge + // that should have been artifact combined. Most likely instruction that uses + // DstReg has to do more/fewer elements legalization compatible with NarrowTy. + assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if (NarrowTy == SrcTy) + return UnableToLegalize; - // Break into a common type - SmallVector Parts; - for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) - extractGCDType(Parts, GCDTy, MO.getReg()); + // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use + // is for old mir tests. Since the changes to more/fewer elements it should no + // longer be possible to generate MIR like this when starting from llvm-ir + // because LCMTy approach was replaced with merge/unmerge to vector elements. + if (TypeIdx == 1) { + assert(SrcTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getNumElements() >= SrcTy.getNumElements())) + return UnableToLegalize; + // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy) + // + // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy) + // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy) + // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4 + // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6 + // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8 + // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11 + + SmallVector Elts; + LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType(); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) { + auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg()); + for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j) + Elts.push_back(Unmerge.getReg(j)); + } - // Build the requested new merge, padding with undef. - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, - TargetOpcode::G_ANYEXT); + SmallVector NarrowTyElts; + unsigned NumNarrowTyElts = NarrowTy.getNumElements(); + unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts; + for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces; + ++i, Offset += NumNarrowTyElts) { + ArrayRef Pieces(&Elts[Offset], NumNarrowTyElts); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } - // Pack into the original result register. - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + MIRBuilder.buildMerge(DstReg, NarrowTyElts); + MI.eraseFromParent(); + return Legalized; + } + + assert(TypeIdx == 0 && "Bad type index"); + if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) || + (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0)) + return UnableToLegalize; + // This is most likely SrcTy (smaller then register size) packed in DstTy + // (larger then register size) and since merge was not combined it will be + // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy + // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy. + + // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4 + // + // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg + // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4 + // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence + SmallVector NarrowTyElts; + unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; + unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts; + for (unsigned i = 0; i < NumParts; ++i) { + SmallVector Sources; + for (unsigned j = 0; j < NumElts; ++j) + Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg()); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0)); + } + + MIRBuilder.buildMerge(DstReg, NarrowTyElts); MI.eraseFromParent(); return Legalized; } @@ -4217,164 +4138,15 @@ return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, - LLT NarrowTy) { - assert(TypeIdx == 0 && "only one type index expected"); - - const unsigned Opc = MI.getOpcode(); - const int NumDefOps = MI.getNumExplicitDefs(); - const int NumSrcOps = MI.getNumOperands() - NumDefOps; - const unsigned Flags = MI.getFlags(); - const unsigned NarrowSize = NarrowTy.getSizeInBits(); - const LLT NarrowScalarTy = LLT::scalar(NarrowSize); - - assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " - "result and 1-3 sources or 2 results and " - "1-2 sources"); - - SmallVector DstRegs; - for (int I = 0; I < NumDefOps; ++I) - DstRegs.push_back(MI.getOperand(I).getReg()); - - // First of all check whether we are narrowing (changing the element type) - // or reducing the vector elements - const LLT DstTy = MRI.getType(DstRegs[0]); - const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); - - SmallVector ExtractedRegs[3]; - SmallVector Parts; - - // Break down all the sources into NarrowTy pieces we can operate on. This may - // involve creating merges to a wider type, padded with undef. - for (int I = 0; I != NumSrcOps; ++I) { - Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. - // For fewerElements, this is a smaller vector with the same element type. - LLT OpNarrowTy; - if (IsNarrow) { - OpNarrowTy = NarrowScalarTy; - - // In case of narrowing, we need to cast vectors to scalars for this to - // work properly - // FIXME: Can we do without the bitcast here if we're narrowing? - if (SrcTy.isVector()) { - SrcTy = LLT::scalar(SrcTy.getSizeInBits()); - SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); - } - } else { - auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() - : ElementCount::getFixed(1); - OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); - } - - LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); - - // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. - buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], - TargetOpcode::G_ANYEXT); - } - - SmallVector ResultRegs[2]; - - // Input operands for each sub-instruction. - SmallVector InputRegs(NumSrcOps, Register()); - - int NumParts = ExtractedRegs[0].size(); - const unsigned DstSize = DstTy.getSizeInBits(); - const LLT DstScalarTy = LLT::scalar(DstSize); - - // Narrowing needs to use scalar types - LLT DstLCMTy, NarrowDstTy; - if (IsNarrow) { - DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); - NarrowDstTy = NarrowScalarTy; - } else { - DstLCMTy = getLCMType(DstTy, NarrowTy); - NarrowDstTy = NarrowTy; - } - - // We widened the source registers to satisfy merge/unmerge size - // constraints. We'll have some extra fully undef parts. - const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; - - for (int I = 0; I != NumRealParts; ++I) { - // Emit this instruction on each of the split pieces. - for (int J = 0; J != NumSrcOps; ++J) - InputRegs[J] = ExtractedRegs[J][I]; - - MachineInstrBuilder Inst; - if (NumDefOps == 1) - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); - else - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, - Flags); - - for (int J = 0; J != NumDefOps; ++J) - ResultRegs[J].push_back(Inst.getReg(J)); - } - - // Fill out the widened result with undef instead of creating instructions - // with undef inputs. - int NumUndefParts = NumParts - NumRealParts; - if (NumUndefParts != 0) { - Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); - for (int I = 0; I != NumDefOps; ++I) - ResultRegs[I].append(NumUndefParts, Undef); - } - - // Extract the possibly padded result. Use a scratch register if we need to do - // a final bitcast, otherwise use the original result register. - Register MergeDstReg; - for (int I = 0; I != NumDefOps; ++I) { - if (IsNarrow && DstTy.isVector()) - MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); - else - MergeDstReg = DstRegs[I]; - - buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); - - // Recast to vector if we narrowed a vector - if (IsNarrow && DstTy.isVector()) - MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); - } - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - int64_t Imm = MI.getOperand(2).getImm(); - - LLT DstTy = MRI.getType(DstReg); - - SmallVector Parts; - LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); - - for (Register &R : Parts) - R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); - - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - - MI.eraseFromParent(); - return Legalized; -} - LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; + GenericMachineInstr &GMI = cast(MI); + unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; switch (MI.getOpcode()) { case G_IMPLICIT_DEF: - return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); case G_TRUNC: case G_AND: case G_OR: @@ -4439,10 +4211,8 @@ case G_SSUBSAT: case G_UADDSAT: case G_USUBSAT: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); case G_UMULO: case G_SMULO: - return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: @@ -4454,7 +4224,6 @@ case G_CTTZ_ZERO_UNDEF: case G_CTPOP: case G_FCOPYSIGN: - return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: case G_ANYEXT: @@ -4467,14 +4236,16 @@ case G_INTTOPTR: case G_PTRTOINT: case G_ADDRSPACE_CAST: - return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: - return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); case G_SELECT: - return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return fewerElementsVectorMultiEltType(GMI, NumElts); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/}); case G_PHI: - return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + return fewerElementsVectorPhi(GMI, NumElts); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: @@ -4491,7 +4262,7 @@ case G_STORE: return reduceLoadStoreWidth(cast(MI), TypeIdx, NarrowTy); case G_SEXT_INREG: - return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/}); GISEL_VECREDUCE_CASES_NONSEQ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); case G_SHUFFLE_VECTOR: @@ -5053,6 +4824,15 @@ case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: @@ -5070,6 +4850,17 @@ Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FMA: + case TargetOpcode::G_FSHR: + case TargetOpcode::G_FSHL: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_EXTRACT: if (TypeIdx != 1) return UnableToLegalize; @@ -5079,6 +4870,11 @@ return Legalized; case TargetOpcode::G_INSERT: case TargetOpcode::G_FREEZE: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FABS: + case TargetOpcode::G_BSWAP: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_SEXT_INREG: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5098,30 +4894,34 @@ moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UNMERGE_VALUES: { - if (TypeIdx != 1) - return UnableToLegalize; - - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - int NumDst = MI.getNumOperands() - 1; - moreElementsVectorSrc(MI, MoreTy, NumDst); - - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - for (int I = 0; I != NumDst; ++I) - MIB.addDef(MI.getOperand(I).getReg()); + case TargetOpcode::G_UNMERGE_VALUES: + return UnableToLegalize; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_BUILD_VECTOR: { + SmallVector Elts; + for (auto Op : MI.uses()) { + Elts.push_back(Op.getReg()); + } - int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); - for (int I = NumDst; I != NewNumDst; ++I) - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) { + Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType())); + } - MIB.addUse(MI.getOperand(NumDst).getReg()); + MIRBuilder.buildDeleteTrailingVectorElements( + MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts)); MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_PHI: - return moreElementsVectorPhi(MI, TypeIdx, MoreTy); - case TargetOpcode::G_SHUFFLE_VECTOR: - return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_TRUNC: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } @@ -6778,6 +6578,24 @@ LLT VecTy = MRI.getType(SrcVec); LLT EltTy = VecTy.getElementType(); + unsigned NumElts = VecTy.getNumElements(); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) { + SmallVector SrcRegs; + extractParts(SrcVec, EltTy, NumElts, SrcRegs); + + if (InsertVal) { + SrcRegs[IdxVal] = MI.getOperand(2).getReg(); + MIRBuilder.buildMerge(DstReg, SrcRegs); + } else { + MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]); + } + + MI.eraseFromParent(); + return Legalized; + } + if (!EltTy.isByteSized()) { // Not implemented. LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); return UnableToLegalize; @@ -6796,7 +6614,6 @@ // if the index is out of bounds. Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); - int64_t IdxVal; if (mi_match(Idx, MRI, m_ICst(IdxVal))) { int64_t Offset = IdxVal * EltBytes; PtrInfo = PtrInfo.getWithOffset(Offset); @@ -6923,6 +6740,32 @@ LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); + // Extract sub-vector or one element + if (SrcTy.isVector()) { + unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); + + if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) && + (Offset + DstSize <= SrcTy.getSizeInBits())) { + // Unmerge and allow access to each Src element for the artifact combiner. + auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src); + + // Take element(s) we need to extract and copy it (merge them). + SmallVector SubVectorElts; + for (unsigned Idx = Offset / SrcEltSize; + Idx < (Offset + DstSize) / SrcEltSize; ++Idx) { + SubVectorElts.push_back(Unmerge.getReg(Idx)); + } + if (SubVectorElts.size() == 1) + MIRBuilder.buildCopy(Dst, SubVectorElts[0]); + else + MIRBuilder.buildMerge(Dst, SubVectorElts); + + MI.eraseFromParent(); + return Legalized; + } + } + if (DstTy.isScalar() && (SrcTy.isScalar() || (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { @@ -6956,6 +6799,45 @@ LLT DstTy = MRI.getType(Src); LLT InsertTy = MRI.getType(InsertSrc); + // Insert sub-vector or one element + if (DstTy.isVector() && !InsertTy.isPointer()) { + LLT EltTy = DstTy.getElementType(); + unsigned EltSize = EltTy.getSizeInBits(); + unsigned InsertSize = InsertTy.getSizeInBits(); + + if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) && + (Offset + InsertSize <= DstTy.getSizeInBits())) { + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src); + SmallVector DstElts; + unsigned Idx = 0; + // Elements from Src before insert start Offset + for (; Idx < Offset / EltSize; ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + // Replace elements in Src with elements from InsertSrc + if (InsertTy.getSizeInBits() > EltSize) { + auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc); + for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize; + ++Idx, ++i) { + DstElts.push_back(UnmergeInsertSrc.getReg(i)); + } + } else { + DstElts.push_back(InsertSrc); + ++Idx; + } + + // Remaining elements from Src after insert + for (; Idx < DstTy.getNumElements(); ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + MIRBuilder.buildMerge(Dst, DstElts); + MI.eraseFromParent(); + return Legalized; + } + } + if (InsertTy.isVector() || (DstTy.isVector() && DstTy.getElementType() != InsertTy)) return UnableToLegalize; diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -215,6 +215,48 @@ return buildPtrMask(Res, Op0, MaskReg); } +MachineInstrBuilder +MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && + "Op0 has more elements"); + + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + SmallVector Regs; + for (auto Op : Unmerge.getInstr()->defs()) + Regs.push_back(Op.getReg()); + Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0); + unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); + for (unsigned i = 0; i < NumberOfPadElts; ++i) + Regs.push_back(Undef); + return buildMerge(Res, Regs); +} + +MachineInstrBuilder +MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() < Op0Ty.getNumElements()) && + "Op0 has fewer elements"); + + SmallVector Regs; + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + for (unsigned i = 0; i < ResTy.getNumElements(); ++i) + Regs.push_back(Unmerge.getReg(i)); + return buildMerge(Res, Regs); +} + MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } @@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) { unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits(); - SmallVector TmpVec; - for (unsigned I = 0; I != NumReg; ++I) - TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res)); - return buildUnmerge(TmpVec, Op); + SmallVector TmpVec(NumReg, Res); + return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef Res, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -923,6 +923,21 @@ return LLT::scalar(LCMSize); } +LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) { + if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy || + (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits())) + return getLCMType(OrigTy, TargetTy); + + unsigned OrigTyNumElts = OrigTy.getNumElements(); + unsigned TargetTyNumElts = TargetTy.getNumElements(); + if (OrigTyNumElts % TargetTyNumElts == 0) + return OrigTy; + + unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts); + return LLT::scalarOrVector(ElementCount::getFixed(NumElts), + OrigTy.getElementType()); +} + LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { const unsigned OrigSize = OrigTy.getSizeInBits(); const unsigned TargetSize = TargetTy.getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -533,7 +533,7 @@ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16, V2S16}) .minScalar(0, S16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .widenScalarToNextMultipleOf(0, 32) .maxScalar(0, S32) .scalarize(0); @@ -541,7 +541,7 @@ getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) .legalFor({S32, S16, V2S16}) // Clamp modifier .minScalarOrElt(0, S16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .widenScalarToNextPow2(0, 32) .lower(); @@ -712,7 +712,7 @@ } if (ST.hasVOP3PInsts()) - FPOpActions.clampMaxNumElements(0, S16, 2); + FPOpActions.clampMaxNumElementsStrict(0, S16, 2); FPOpActions .scalarize(0) @@ -728,7 +728,7 @@ getActionDefinitionsBuilder({G_FNEG, G_FABS}) .legalFor(FPTypesPK16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .clampScalar(0, S16, S64); @@ -965,7 +965,7 @@ if (ST.has16BitInsts()) { getActionDefinitionsBuilder(G_BSWAP) .legalFor({S16, S32, V2S16}) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) // FIXME: Fixing non-power-of-2 before clamp is workaround for // narrowScalar limitation. .widenScalarToNextPow2(0) @@ -1425,6 +1425,13 @@ // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + .lowerIf([=](const LegalityQuery &Query) { + // Sub-vector(or single element) insert and extract. + // TODO: verify immediate offset here since lower only works with + // whole elements. + const LLT BigTy = Query.Types[BigTyIdx]; + return BigTy.isVector(); + }) // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; @@ -1583,7 +1590,7 @@ // Prefer to reduce vector widths for 16-bit vectors before lowering, to // get more vector shift opportunities, since we'll get those when // expanded. - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)); + .clampMaxNumElementsStrict(0, S16, 2); } else if (ST.has16BitInsts()) { SextInReg.lowerFor({{S32}, {S64}, {S16}}); } else { @@ -1605,14 +1612,14 @@ getActionDefinitionsBuilder(G_FSHR) .legalFor({{S32, S32}}) .lowerFor({{V2S16, V2S16}}) - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .lower(); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_FSHL) .lowerFor({{V2S16, V2S16}}) - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .lower(); } else { @@ -2535,10 +2542,8 @@ } else { // For cases where the widened type isn't a nice register value, unmerge // from a widened register (e.g. <3 x s16> -> <4 x s16>) - B.setInsertPt(B.getMBB(), ++B.getInsertPt()); - WideLoad = Helper.widenWithUnmerge(WideTy, ValReg); - B.setInsertPt(B.getMBB(), MI.getIterator()); - B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0); + WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0); + B.buildDeleteTrailingVectorElements(ValReg, WideLoad); } } @@ -3811,6 +3816,10 @@ llvm_unreachable("invalid data type"); } + if (StoreVT == LLT::fixed_vector(3, S16)) { + Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg) + .getReg(0); + } return Reg; } @@ -4653,9 +4662,23 @@ // Deal with the one annoying legal case. const LLT V3S16 = LLT::fixed_vector(3, 16); if (Ty == V3S16) { - padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1); - auto Concat = B.buildConcatVectors(LLT::fixed_vector(6, 16), ResultRegs); - B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat); + if (IsTFE) { + if (ResultRegs.size() == 1) { + NewResultReg = ResultRegs[0]; + } else if (ResultRegs.size() == 2) { + LLT V4S16 = LLT::fixed_vector(4, 16); + NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0); + } else { + return false; + } + } + + if (MRI->getType(DstReg).getNumElements() < + MRI->getType(NewResultReg).getNumElements()) { + B.buildDeleteTrailingVectorElements(DstReg, NewResultReg); + } else { + B.buildPadVectorWithUndefElements(DstReg, NewResultReg); + } return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1162,18 +1162,25 @@ // 96-bit loads are only available for vector loads. We need to split this // into a 64-bit part, and 32 (unless we can widen to a 128-bit load). if (MMO->getAlign() < Align(16)) { + MachineFunction *MF = MI.getParent()->getParent(); + ApplyRegBankMapping ApplyBank(*this, MRI, DstBank); + MachineIRBuilder B(MI, ApplyBank); + LegalizerHelper Helper(*MF, ApplyBank, B); LLT Part64, Part32; std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64); - auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0); - auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8); - - auto Undef = B.buildUndef(LoadTy); - auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0); - B.buildInsert(MI.getOperand(0), Ins0, Load1, 64); + if (Helper.reduceLoadStoreWidth(cast(MI), 0, Part64) != + LegalizerHelper::Legalized) + return false; + return true; } else { LLT WiderTy = widen96To128(LoadTy); auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0); - B.buildExtract(MI.getOperand(0), WideLoad, 0); + if (WiderTy.isScalar()) + B.buildTrunc(MI.getOperand(0), WideLoad); + else { + B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(), + WideLoad); + } } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -12,14 +12,6 @@ target triple = "aarch64--" ; BIG-ENDIAN: unable to translate in big endian mode -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<28 x s32>) = G_CONCAT_VECTORS %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>) (in function: odd_vector) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector -; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: -define void @odd_vector(<7 x i32>* %addr) { - %vec = load <7 x i32>, <7 x i32>* %addr - store <7 x i32> %vec, <7 x i32>* %addr - ret void -} ; Make sure we don't mess up metadata arguments. declare void @llvm.write_register.i64(metadata, i64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll @@ -58,9 +58,8 @@ ; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: RET_ReallyLR %call = call <3 x float> @bar(float undef) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -229,11 +229,10 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -244,35 +244,37 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]](<2 x s64>) ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR %3:_(s64) = COPY $d0 %4:_(s64) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -229,11 +229,10 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -326,8 +326,13 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BITCAST]](s128) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -74,17 +74,13 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s32>) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -30,12 +30,14 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 - ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 - ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -62,12 +64,14 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 - ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 - ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -628,23 +628,25 @@ ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v6 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-NEXT: v_and_or_b32 v3, v3, v9, s4 -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX9-NEXT: v_and_or_b32 v2, v4, v9, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX9-NEXT: v_and_or_b32 v3, v5, v9, s4 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v8 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v3, v4, v9, v3 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX9-NEXT: v_and_or_b32 v4, v5, v9, s4 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 -; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v4 -; GFX9-NEXT: v_pk_add_f16 v1, v3, v1 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-NEXT: v_pk_add_f16 v1, v4, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -657,18 +659,20 @@ ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 -; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v6 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v6 -; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs: @@ -680,23 +684,25 @@ ; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v6 ; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-DENORM-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-DENORM-NEXT: v_and_or_b32 v3, v3, v9, s4 -; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX9-DENORM-NEXT: v_and_or_b32 v2, v4, v9, v2 -; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX9-DENORM-NEXT: v_and_or_b32 v3, v5, v9, s4 -; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v8 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v3, v4, v9, v3 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX9-DENORM-NEXT: v_and_or_b32 v4, v5, v9, s4 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 -; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v4 -; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v3, v1 -; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v4, v1 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; @@ -709,18 +715,20 @@ ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6 -; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v6 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4 ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v6 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_3xhalf_add_mul_rhs: @@ -734,23 +742,25 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-NEXT: v_and_or_b32 v3, v3, v8, s4 ; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6 ; GFX10-NEXT: v_and_or_b32 v2, v2, v8, v7 +; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX10-NEXT: v_and_or_b32 v2, v3, v8, s4 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-NEXT: v_and_or_b32 v2, v4, v8, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6 +; GFX10-NEXT: v_pk_add_f16 v0, v2, v0 ; GFX10-NEXT: v_and_or_b32 v2, v5, v8, s4 -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v6 -; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_and_or_b32 v3, v4, v8, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_pk_add_f16 v1, v2, v1 -; GFX10-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs: @@ -758,23 +768,25 @@ ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v3, v3, v7, s4 -; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v5, v5, v7, s4 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v7, v6 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v7, v8 -; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v7, v9 -; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v7 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v8 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v3, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v5, v9, s4 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v3 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs: @@ -788,23 +800,25 @@ ; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v3, v3, v8, s4 ; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6 ; GFX10-DENORM-NEXT: v_and_or_b32 v2, v2, v8, v7 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX10-DENORM-NEXT: v_and_or_b32 v2, v3, v8, s4 -; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 ; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-DENORM-NEXT: v_and_or_b32 v2, v4, v8, v2 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 ; GFX10-DENORM-NEXT: v_and_or_b32 v2, v5, v8, s4 -; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v5, 16, v6 -; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 -; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-DENORM-NEXT: v_and_or_b32 v3, v4, v8, v3 -; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v5 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v2, v1 -; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v3 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: @@ -812,23 +826,25 @@ ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v3, v3, v7, s4 -; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v5, v5, v7, s4 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v7, v6 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v7, v8 -; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v7, v9 -; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v7 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v8 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v3, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v5, v9, s4 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v3 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul <3 x half> %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -888,11 +888,12 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef @@ -942,12 +943,13 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -14,9 +14,9 @@ ; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 ; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 ; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 +; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 ; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 -; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 @@ -38,8 +38,6 @@ ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208 ; GCN-NEXT: s_waitcnt vmcnt(7) ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 ; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 @@ -48,6 +46,8 @@ ; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 ; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 ; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: s_endpgm ; ; GFX10-LABEL: v_insert_v64i32_37: @@ -58,13 +58,13 @@ ; GFX10-NEXT: s_clause 0xf ; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] ; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 ; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 ; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 @@ -84,8 +84,6 @@ ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:192 ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208 -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 @@ -94,6 +92,8 @@ ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -198,156 +198,3 @@ S_ENDPGM 0, implicit %1, implicit %2 ... - ---- -name: extract_sgpr_s32_from_v3s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_s32_from_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s32) = G_EXTRACT %0, 0 - %2:sgpr(s32) = G_EXTRACT %0, 32 - %3:sgpr(s32) = G_EXTRACT %0, 64 - S_ENDPGM 0, implicit %0, implicit %2, implicit %3 - -... - ---- -name: extract_sgpr_v2s32_from_v3s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_v2s32_from_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(<2 x s32>) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - ---- -name: extract_sgpr_v3s32_from_v4s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3 - ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<3 x s32>) = G_EXTRACT %0, 0 - %2:sgpr(<3 x s32>) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1, implicit %2 - -... - ---- -name: extract_sgpr_v2s16_from_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - ---- -name: extract_sgpr_v2s16_from_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v6s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s16) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -32,8 +32,7 @@ %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 - %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) %15:sgpr(p1) = G_INTTOPTR %13(s64) %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FCONSTANT double -0.000000e+00 @@ -82,8 +81,7 @@ %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 - %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) %15:sgpr(p1) = G_INTTOPTR %13(s64) %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FABS %18 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -566,42 +566,3 @@ %2:vgpr(s256) = G_INSERT %0, %1, 128 S_ENDPGM 0, implicit %2 ... - ---- -name: insert_sgpr_v2s16_to_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 - ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = COPY $sgpr2 - %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 - -... - ---- -name: insert_sgpr_v2s16_to_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = COPY $sgpr2 - %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s -# ERR: remark: :0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0) -# ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0) +# ERR: remark: :0:0: instruction is not legal: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0) +# ERR-NEXT: :0:0: instruction is not legal: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0) # ERR-NOT: remark # FIXME: This 16-bit insert source should not be legal and this test diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -2078,11 +2078,11 @@ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef @@ -2252,11 +2252,11 @@ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() store volatile <3 x half> %val, <3 x half> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -2101,11 +2101,12 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2161,11 +2162,12 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2339,12 +2341,13 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2400,13 +2403,14 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2462,44 +2466,45 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2555,47 +2560,48 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1230,12 +1230,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef @@ -1267,12 +1267,12 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef @@ -1363,12 +1363,12 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef @@ -1680,12 +1680,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x half> %arg0, <3 x half> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -1383,9 +1383,9 @@ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -1396,11 +1396,12 @@ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) + ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) + ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_and_s32 @@ -302,18 +302,14 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_AND %0, %1 @@ -350,23 +346,20 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_AND %0, %1 @@ -421,40 +414,65 @@ ; CHECK-LABEL: name: test_and_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -490,54 +508,102 @@ ; CHECK-LABEL: name: test_and_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND11]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_AND %0, %1 @@ -552,34 +618,15 @@ bb.0: ; CHECK-LABEL: name: test_and_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -114,10 +114,7 @@ ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s --- name: test_ashr_s32_s32 @@ -532,43 +532,40 @@ ; SI-LABEL: name: test_ashr_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -798,32 +795,33 @@ ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9PLUS-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9PLUS-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9PLUS-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -511,10 +511,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] @@ -549,19 +548,18 @@ ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -602,14 +600,13 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] @@ -660,31 +657,30 @@ ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -757,12 +753,11 @@ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -858,6 +853,10 @@ ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -872,15 +871,18 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) @@ -1106,6 +1108,22 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR3]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] @@ -1135,15 +1153,14 @@ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) @@ -1367,25 +1384,24 @@ ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -1433,15 +1449,14 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) @@ -1562,30 +1577,38 @@ ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -1598,29 +1621,36 @@ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV24]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV25]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]] + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV26]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV27]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV28]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV29]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C]] + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C]] ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV30]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV31]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) @@ -1745,6 +1775,10 @@ ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -1759,37 +1793,66 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV20]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV21]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV22]], [[C]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV23]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[C]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[C]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[C]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV60]], [[C]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV61]], [[C]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV62]], [[C]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV63]], [[C]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) @@ -1885,61 +1948,124 @@ ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV18]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV19]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV36]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV37]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV54]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV55]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV72]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV73]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV90]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV91]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV108]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV109]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C]] + ; CHECK-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV126]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV127]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8) + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8) + ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]] + ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -2210,21 +2336,20 @@ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) @@ -2253,20 +2378,19 @@ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) @@ -2388,35 +2512,50 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -9,8 +9,9 @@ liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_0_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -24,8 +25,9 @@ liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_1_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -39,8 +41,9 @@ liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_2_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -54,8 +57,9 @@ liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-LABEL: name: extract_vector_elt_0_v3i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -69,8 +73,9 @@ liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: extract_vector_elt_0_v4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -205,8 +210,9 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -241,8 +247,9 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -258,8 +265,9 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -301,11 +309,8 @@ ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -328,9 +333,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1017,8 +1021,9 @@ ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1036,8 +1041,9 @@ ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 1 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1055,8 +1061,9 @@ ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 2 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1164,8 +1171,9 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<2 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1181,8 +1189,9 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v8i64 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<8 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<8 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1198,8 +1207,9 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v16i64 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<16 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<16 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1215,8 +1225,9 @@ liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_CONSTANT i64 0 %2:_(s32) = G_TRUNC %1 @@ -1234,8 +1245,9 @@ ; CHECK-LABEL: name: extract_vector_elt_7_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1255,8 +1267,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 32 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1294,218 +1307,13 @@ ; CHECK-LABEL: name: extract_vector_elt_33_v64p3 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST1]](<16 x p3>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST2]](<16 x p3>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST3]](<16 x p3>) - ; CHECK-NEXT: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store (p3) into %stack.0, align 256, addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store (p3) into %stack.0 + 4, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store (p3) into %stack.0 + 8, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store (p3) into %stack.0 + 12, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store (p3) into %stack.0 + 16, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK-NEXT: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store (p3) into %stack.0 + 20, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK-NEXT: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store (p3) into %stack.0 + 24, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK-NEXT: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store (p3) into %stack.0 + 28, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK-NEXT: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store (p3) into %stack.0 + 32, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK-NEXT: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store (p3) into %stack.0 + 36, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK-NEXT: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store (p3) into %stack.0 + 40, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK-NEXT: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store (p3) into %stack.0 + 44, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK-NEXT: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store (p3) into %stack.0 + 48, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK-NEXT: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store (p3) into %stack.0 + 52, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store (p3) into %stack.0 + 56, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store (p3) into %stack.0 + 60, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store (p3) into %stack.0 + 64, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store (p3) into %stack.0 + 68, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store (p3) into %stack.0 + 72, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK-NEXT: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store (p3) into %stack.0 + 76, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK-NEXT: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store (p3) into %stack.0 + 80, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK-NEXT: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store (p3) into %stack.0 + 84, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK-NEXT: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store (p3) into %stack.0 + 88, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK-NEXT: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store (p3) into %stack.0 + 92, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK-NEXT: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store (p3) into %stack.0 + 96, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK-NEXT: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store (p3) into %stack.0 + 100, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK-NEXT: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store (p3) into %stack.0 + 104, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK-NEXT: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store (p3) into %stack.0 + 108, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK-NEXT: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store (p3) into %stack.0 + 112, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK-NEXT: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store (p3) into %stack.0 + 116, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK-NEXT: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store (p3) into %stack.0 + 120, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store (p3) into %stack.0 + 124, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store (p3) into %stack.0 + 128, align 128, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK-NEXT: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store (p3) into %stack.0 + 132, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store (p3) into %stack.0 + 136, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store (p3) into %stack.0 + 140, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK-NEXT: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store (p3) into %stack.0 + 144, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK-NEXT: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store (p3) into %stack.0 + 148, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK-NEXT: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store (p3) into %stack.0 + 152, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK-NEXT: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store (p3) into %stack.0 + 156, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK-NEXT: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store (p3) into %stack.0 + 160, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK-NEXT: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store (p3) into %stack.0 + 164, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK-NEXT: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store (p3) into %stack.0 + 168, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK-NEXT: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store (p3) into %stack.0 + 172, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK-NEXT: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store (p3) into %stack.0 + 176, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK-NEXT: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store (p3) into %stack.0 + 180, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK-NEXT: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store (p3) into %stack.0 + 184, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK-NEXT: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store (p3) into %stack.0 + 188, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK-NEXT: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store (p3) into %stack.0 + 192, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK-NEXT: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store (p3) into %stack.0 + 196, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK-NEXT: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store (p3) into %stack.0 + 200, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK-NEXT: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store (p3) into %stack.0 + 204, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK-NEXT: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store (p3) into %stack.0 + 208, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK-NEXT: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store (p3) into %stack.0 + 212, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK-NEXT: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store (p3) into %stack.0 + 216, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK-NEXT: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store (p3) into %stack.0 + 220, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK-NEXT: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store (p3) into %stack.0 + 224, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK-NEXT: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store (p3) into %stack.0 + 228, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK-NEXT: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store (p3) into %stack.0 + 232, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK-NEXT: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store (p3) into %stack.0 + 236, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK-NEXT: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store (p3) into %stack.0 + 240, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK-NEXT: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store (p3) into %stack.0 + 244, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK-NEXT: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store (p3) into %stack.0 + 248, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK-NEXT: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store (p3) into %stack.0 + 252, basealign 256, addrspace 5) - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load (p3) from %stack.0 + 132, addrspace 5) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LOAD4]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -187,8 +187,9 @@ ; CHECK-LABEL: name: test_extract_s32_v2s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -201,8 +202,9 @@ ; CHECK-LABEL: name: test_extract_s32_v2s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -215,8 +217,9 @@ ; CHECK-LABEL: name: test_extract_s32_v3s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -229,8 +232,9 @@ ; CHECK-LABEL: name: test_extract_s32_v3s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -243,8 +247,9 @@ ; CHECK-LABEL: name: test_extract_s32_v3s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 64 $vgpr0 = COPY %1 @@ -257,8 +262,9 @@ ; CHECK-LABEL: name: test_extract_s32_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -313,8 +319,9 @@ ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -328,8 +335,9 @@ ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -343,8 +351,9 @@ ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -357,8 +366,9 @@ ; CHECK-LABEL: name: test_extract_s64_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -372,8 +382,9 @@ ; CHECK-LABEL: name: test_extract_s64_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -387,8 +398,9 @@ ; CHECK-LABEL: name: test_extract_s64_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -401,8 +413,9 @@ ; CHECK-LABEL: name: test_extract_p0_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -416,8 +429,9 @@ ; CHECK-LABEL: name: test_extract_p0_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -431,8 +445,9 @@ ; CHECK-LABEL: name: test_extract_p0_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -444,13 +459,8 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -463,13 +473,8 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 8 %2:_(s32) = G_ANYEXT %1 @@ -482,13 +487,8 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -501,13 +501,8 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset24 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 24 %2:_(s32) = G_ANYEXT %1 @@ -522,26 +517,7 @@ ; CHECK-LABEL: name: extract_s8_v3s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -555,21 +531,7 @@ ; CHECK-LABEL: name: extract_s8_v5s1_offset4 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF2]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF3]], [[UV5]](<5 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 %2:_(s32) = G_ANYEXT %1 @@ -582,8 +544,17 @@ bb.0: ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -595,8 +566,17 @@ bb.0: ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -823,13 +803,9 @@ ; CHECK-LABEL: name: extract_s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(s16) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -908,12 +884,17 @@ ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -926,12 +907,17 @@ ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -231,25 +231,19 @@ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -274,25 +268,19 @@ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -309,12 +297,9 @@ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -446,21 +446,21 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1382,7 +1382,6 @@ ; SI-LABEL: name: test_fdiv_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1391,6 +1390,7 @@ ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1451,7 +1451,6 @@ ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fdiv_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1460,6 +1459,7 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1492,7 +1492,6 @@ ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1501,6 +1500,7 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1533,7 +1533,6 @@ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1542,6 +1541,7 @@ ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1562,7 +1562,6 @@ ; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s16 ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1571,6 +1570,7 @@ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -518,27 +518,27 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fmaxnum_s32_ieee_mode_on @@ -496,32 +496,38 @@ ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fminnum_s32_ieee_mode_on @@ -496,32 +496,38 @@ ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s --- name: test_fmul_s32 @@ -433,21 +433,21 @@ ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -223,26 +223,9 @@ ; CHECK-LABEL: name: test_freeze_s1056 ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV32]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV2]](s33792) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1056) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1056) = G_ANYEXT [[COPY]](s512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1056) = G_FREEZE [[ANYEXT]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1056) %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s1056) = G_ANYEXT %0 %2:_(s1056) = G_FREEZE %1 @@ -417,19 +400,18 @@ ; CHECK-LABEL: name: test_freeze_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32) ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]] ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(<33 x s32>), [[UV49:%[0-9]+]]:_(<33 x s32>), [[UV50:%[0-9]+]]:_(<33 x s32>), [[UV51:%[0-9]+]]:_(<33 x s32>), [[UV52:%[0-9]+]]:_(<33 x s32>), [[UV53:%[0-9]+]]:_(<33 x s32>), [[UV54:%[0-9]+]]:_(<33 x s32>), [[UV55:%[0-9]+]]:_(<33 x s32>), [[UV56:%[0-9]+]]:_(<33 x s32>), [[UV57:%[0-9]+]]:_(<33 x s32>), [[UV58:%[0-9]+]]:_(<33 x s32>), [[UV59:%[0-9]+]]:_(<33 x s32>), [[UV60:%[0-9]+]]:_(<33 x s32>), [[UV61:%[0-9]+]]:_(<33 x s32>), [[UV62:%[0-9]+]]:_(<33 x s32>), [[UV63:%[0-9]+]]:_(<33 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<528 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[UV48]](<33 x s32>) + ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[UV33]](s32), [[UV34]](s32), [[UV35]](s32), [[UV36]](s32), [[UV37]](s32), [[UV38]](s32), [[UV39]](s32), [[UV40]](s32), [[UV41]](s32), [[UV42]](s32), [[UV43]](s32), [[UV44]](s32), [[UV45]](s32), [[UV46]](s32), [[UV47]](s32), [[UV48]](s32), [[UV49]](s32), [[UV50]](s32), [[UV51]](s32), [[UV52]](s32), [[UV53]](s32), [[UV54]](s32), [[UV55]](s32), [[UV56]](s32), [[UV57]](s32), [[UV58]](s32), [[UV59]](s32), [[UV60]](s32), [[UV61]](s32), [[UV62]](s32), [[UV63]](s32), [[FREEZE2]](s32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 S_NOP 0, implicit %1 @@ -530,16 +512,13 @@ ; CHECK-LABEL: name: test_freeze_v3s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s8>) = G_TRUNC %0 %2:_(<3 x s8>) = G_FREEZE %1 @@ -569,24 +548,26 @@ ; CHECK-LABEL: name: test_freeze_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]] - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s16>) = G_FREEZE %1 @@ -616,28 +597,33 @@ ; CHECK-LABEL: name: test_freeze_v5s16 ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]] - ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32), [[BITCAST2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR3]](<5 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST3]](s32), [[LSHR]](s32), [[BITCAST4]](s32), [[LSHR1]](s32), [[BITCAST5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR]](<5 x s32>) %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:_(<5 x s16>) = G_TRUNC %0 %2:_(<5 x s16>) = G_FREEZE %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -798,19 +798,19 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 @@ -845,17 +845,20 @@ ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -674,8 +674,7 @@ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 @@ -701,22 +700,22 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY6]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY9]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL6]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL6]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -730,9 +729,9 @@ ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY11]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32) @@ -745,116 +744,76 @@ ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY13]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY12]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL9]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT10]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY14]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT11]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[DEF]], [[ZEXT12]](s32) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[C2]], [[COPY15]](s32) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[ZEXT13]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY16]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL11]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C4]] + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C5]] + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[COPY17]](s32) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[SHL14]] - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C1]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[ZEXT11]](s32) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C4]] - ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C5]] - ; SI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND26]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR8]](s16) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[SHL12]], [[C1]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY21]](s32) - ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND27]](s16) - ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]] - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[ZEXT15]](s32) - ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C4]] - ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C5]] - ; SI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND30]](s16) - ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR9]](s16) - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND32:%[0-9]+]]:_(s32) = G_AND [[SHL13]], [[C1]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY22]](s32) - ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND31]](s16) - ; SI-NEXT: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[ZEXT17]](s32) - ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] - ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]] - ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; SI-NEXT: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] - ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI-NEXT: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; SI-NEXT: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] - ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL13]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C]](s32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL14]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] + ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]] + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL15]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>) ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -869,7 +828,6 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -932,75 +890,52 @@ ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16) ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR13]] - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[AND13]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[DEF]], [[C3]](s16) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR15]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[C3]](s16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR13]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL14]] - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL11]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND13]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C3]](s16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL12]], [[LSHR15]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] - ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] - ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[OR8]], [[AND15]](s16) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[SHL12]], [[C3]](s16) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[LSHR17]], [[AND16]](s16) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[SHL15]], [[LSHR18]] - ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C4]] - ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C5]] - ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[OR9]], [[AND17]](s16) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[SHL13]], [[C3]](s16) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[LSHR19]], [[AND18]](s16) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[SHL16]], [[LSHR20]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]] - ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] - ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] - ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] + ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]] + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL15]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1012,19 +947,19 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 @@ -1059,17 +994,20 @@ ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -327,11 +327,11 @@ ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 %34:_(p1) = COPY $vgpr0_vgpr1 @@ -428,11 +428,28 @@ ; CHECK-LABEL: name: test_implicit_def_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF %2:_(<4 x s16>) = G_INSERT %1, %0, 0 @@ -458,12 +475,42 @@ ; CHECK-LABEL: name: test_implicit_def_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 @@ -478,8 +525,40 @@ ; CHECK-LABEL: name: test_implicit_def_v6s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[DEF]](<6 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -10,8 +10,9 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 0 @@ -28,8 +29,9 @@ ; CHECK-LABEL: name: insert_vector_elt_1_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 1 @@ -107,8 +109,9 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v16s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<16 x s64>) = G_INSERT [[DEF]], [[COPY]](s64), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT]](<16 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<16 x s64>) = G_IMPLICIT_DEF %2:_(s32) = G_CONSTANT i32 0 @@ -126,8 +129,9 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_CONSTANT i8 0 @@ -145,8 +149,9 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[DEF]], [[COPY]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(<2 x s8>) = G_IMPLICIT_DEF @@ -166,8 +171,9 @@ ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(s64) = G_CONSTANT i64 0 @@ -305,55 +311,62 @@ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[C3]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(<4 x s32>), [[UV33:%[0-9]+]]:_(<4 x s32>), [[UV34:%[0-9]+]]:_(<4 x s32>), [[UV35:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(<4 x s32>), [[UV37:%[0-9]+]]:_(<4 x s32>), [[UV38:%[0-9]+]]:_(<4 x s32>), [[UV39:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: G_STORE [[UV32]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV33]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV34]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV35]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV36]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV37]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV38]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV39]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) - ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -409,8 +409,9 @@ ; CHECK-LABEL: name: test_insert_v2s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<2 x s32>) = G_INSERT %0, %1, 0 @@ -425,8 +426,9 @@ ; CHECK-LABEL: name: test_insert_v2s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<2 x s32>) = G_INSERT %0, %1, 32 @@ -441,8 +443,9 @@ ; CHECK-LABEL: name: test_insert_v3s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 0 @@ -457,8 +460,9 @@ ; CHECK-LABEL: name: test_insert_v3s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 32 @@ -473,8 +477,9 @@ ; CHECK-LABEL: name: test_insert_v3s32_s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 64 @@ -489,8 +494,9 @@ ; CHECK-LABEL: name: test_insert_v4s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -505,8 +511,9 @@ ; CHECK-LABEL: name: test_insert_v4s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -521,8 +528,9 @@ ; CHECK-LABEL: name: test_insert_v4s32_s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -537,8 +545,9 @@ ; CHECK-LABEL: name: test_insert_v4s32_s32_offset96 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 96 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 96 @@ -553,8 +562,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_s64_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -569,8 +580,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_s64_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -585,8 +598,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_s64_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -601,8 +616,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_s96_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -617,8 +634,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_s96_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -633,8 +652,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -649,8 +670,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -665,8 +688,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -681,8 +706,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -697,8 +724,10 @@ ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -763,11 +792,13 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -813,11 +844,11 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -834,16 +865,29 @@ ; CHECK-LABEL: name: test_insert_v3s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -861,16 +905,28 @@ ; CHECK-LABEL: name: test_insert_v3s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -888,16 +944,28 @@ ; CHECK-LABEL: name: test_insert_v3s16_s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -915,15 +983,29 @@ ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(<2 x s16>) = COPY $vgpr2 @@ -940,15 +1022,29 @@ ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(<2 x s16>) = COPY $vgpr2 @@ -965,15 +1061,28 @@ ; CHECK-LABEL: name: test_insert_v3s16_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -990,15 +1099,28 @@ ; CHECK-LABEL: name: test_insert_v3s16_s32_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -1016,9 +1138,25 @@ ; CHECK-LABEL: name: test_insert_v4s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1034,9 +1172,24 @@ ; CHECK-LABEL: name: test_insert_v4s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1052,9 +1205,25 @@ ; CHECK-LABEL: name: test_insert_v4s16_s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1070,9 +1239,24 @@ ; CHECK-LABEL: name: test_insert_v4s16_s16_offset48 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 48 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1088,8 +1272,25 @@ ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 0 @@ -1104,8 +1305,26 @@ ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 16 @@ -1120,8 +1339,25 @@ ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 32 @@ -1136,9 +1372,27 @@ ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %1, 0 @@ -1154,9 +1408,26 @@ ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %1, 0 @@ -1172,8 +1443,24 @@ ; CHECK-LABEL: name: test_insert_v4s16_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 0 @@ -1188,8 +1475,25 @@ ; CHECK-LABEL: name: test_insert_v4s16_s32_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 16 @@ -1204,8 +1508,24 @@ ; CHECK-LABEL: name: test_insert_v4s16_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -117,23 +117,20 @@ ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -153,19 +150,23 @@ ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -363,23 +364,20 @@ ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -402,20 +400,24 @@ ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) - ; PACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -607,23 +609,19 @@ ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -642,19 +640,20 @@ ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -679,22 +678,17 @@ ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -713,19 +707,20 @@ ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -740,27 +735,24 @@ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -769,27 +761,24 @@ ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -1187,23 +1176,19 @@ ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -1225,20 +1210,21 @@ ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -1269,22 +1255,17 @@ ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -1306,20 +1287,21 @@ ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -1350,22 +1332,17 @@ ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -1387,20 +1364,21 @@ ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX81 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX81 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { ; UNPACKED-LABEL: name: image_store_f16 @@ -195,14 +195,12 @@ ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v3f16 @@ -222,29 +220,27 @@ ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX81-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): @@ -263,19 +259,16 @@ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): @@ -294,19 +287,16 @@ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -30,10 +30,9 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -51,10 +50,9 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 @@ -73,10 +71,9 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 @@ -95,10 +92,9 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<6 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -116,10 +112,9 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -137,24 +132,21 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) + ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) - ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) + ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) + ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) + ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]] + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]] ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] @@ -164,7 +156,7 @@ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]] + ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] @@ -174,7 +166,7 @@ ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]] + ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] ; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] @@ -184,8 +176,8 @@ ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -204,10 +196,9 @@ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -1160,16 +1160,10 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1179,16 +1173,10 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1198,16 +1186,10 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4) @@ -3364,12 +3346,12 @@ ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3395,12 +3377,12 @@ ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3426,12 +3408,12 @@ ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -4935,24 +4917,27 @@ ; CI-LABEL: name: test_load_constant_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -4973,10 +4958,10 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) @@ -4984,10 +4969,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) @@ -4995,10 +4980,10 @@ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5119,10 +5104,10 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) @@ -5230,10 +5215,10 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) @@ -5341,10 +5326,10 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -1010,16 +1010,10 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1029,16 +1023,10 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_flat_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1048,16 +1036,10 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0) @@ -3510,12 +3492,12 @@ ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3541,12 +3523,12 @@ ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3572,12 +3554,12 @@ ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -4944,10 +4926,10 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) @@ -4955,10 +4937,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) @@ -4966,10 +4948,10 @@ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -4990,10 +4972,10 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) @@ -5001,10 +4983,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) @@ -5012,10 +4994,10 @@ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5136,10 +5118,10 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) @@ -5247,10 +5229,10 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) @@ -5358,10 +5340,10 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -1203,8 +1203,9 @@ ; SI-LABEL: name: test_load_global_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1775,16 +1776,10 @@ ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>) - ; SI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1794,16 +1789,10 @@ ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1813,16 +1802,10 @@ ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_global_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1832,16 +1815,10 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1851,16 +1828,10 @@ ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1870,16 +1841,10 @@ ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1) @@ -5837,12 +5802,12 @@ ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5868,12 +5833,12 @@ ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5899,12 +5864,12 @@ ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5930,12 +5895,12 @@ ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5961,12 +5926,12 @@ ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5979,12 +5944,12 @@ ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -6996,7 +6961,6 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7004,6 +6968,7 @@ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7029,7 +6994,6 @@ ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7037,6 +7001,7 @@ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7062,7 +7027,6 @@ ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7070,6 +7034,7 @@ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7095,7 +7060,6 @@ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7103,6 +7067,7 @@ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7128,7 +7093,6 @@ ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7136,6 +7100,7 @@ ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -7148,7 +7113,6 @@ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7156,6 +7120,7 @@ ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -7233,21 +7198,21 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7271,21 +7236,21 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7309,21 +7274,21 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7446,21 +7411,21 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7484,21 +7449,21 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7522,21 +7487,21 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7625,21 +7590,21 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7663,21 +7628,21 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7701,21 +7666,21 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7739,21 +7704,21 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7864,21 +7829,21 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7902,21 +7867,21 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7962,21 +7927,21 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8022,21 +7987,21 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8132,8 +8097,9 @@ ; SI-LABEL: name: test_load_global_v6s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8621,7 +8587,6 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8631,6 +8596,7 @@ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8662,7 +8628,6 @@ ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8672,6 +8637,7 @@ ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8703,7 +8669,6 @@ ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8713,6 +8678,7 @@ ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8744,7 +8710,6 @@ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8754,6 +8719,7 @@ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8785,7 +8751,6 @@ ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8795,6 +8760,7 @@ ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -8809,7 +8775,6 @@ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8819,6 +8784,7 @@ ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -8871,26 +8837,26 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8921,26 +8887,26 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8971,26 +8937,26 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9021,26 +8987,26 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9153,26 +9119,26 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9203,26 +9169,26 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9253,26 +9219,26 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9303,26 +9269,26 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9435,26 +9401,26 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9485,26 +9451,26 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9535,26 +9501,26 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9585,26 +9551,26 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9747,26 +9713,26 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9797,26 +9763,26 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9877,26 +9843,26 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9957,26 +9923,26 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -10504,8 +10470,9 @@ ; SI-LABEL: name: test_load_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) @@ -11453,45 +11420,51 @@ ; SI-LABEL: name: test_load_global_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11512,10 +11485,10 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11523,10 +11496,10 @@ ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11534,10 +11507,10 @@ ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11545,10 +11518,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11556,10 +11529,10 @@ ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11567,10 +11540,10 @@ ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11691,10 +11664,10 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) @@ -11702,10 +11675,10 @@ ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -11813,10 +11786,10 @@ ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -11924,10 +11897,10 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) @@ -11935,10 +11908,10 @@ ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -12046,10 +12019,10 @@ ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14947,17 +14920,18 @@ ; SI-LABEL: name: test_global_v2s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -7733,12 +7733,12 @@ ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7764,12 +7764,12 @@ ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7795,12 +7795,12 @@ ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7826,12 +7826,12 @@ ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7857,12 +7857,12 @@ ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7875,12 +7875,12 @@ ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7893,12 +7893,12 @@ ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7911,12 +7911,12 @@ ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -11741,10 +11741,10 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) @@ -11754,10 +11754,10 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11765,10 +11765,10 @@ ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11776,10 +11776,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11787,10 +11787,10 @@ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11798,10 +11798,10 @@ ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11809,10 +11809,10 @@ ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11820,10 +11820,10 @@ ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -7496,10 +7496,10 @@ ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7519,10 +7519,10 @@ ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7542,10 +7542,10 @@ ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7565,10 +7565,10 @@ ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_lshr_s32_s32 @@ -414,43 +414,40 @@ ; SI-LABEL: name: test_lshr_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_lshr_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -593,17 +590,12 @@ ; SI-LABEL: name: test_lshr_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) @@ -617,29 +609,26 @@ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -647,8 +636,7 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -658,49 +646,52 @@ ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -807,32 +798,33 @@ ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_or_s32 @@ -305,18 +305,14 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_OR %0, %1 @@ -353,23 +349,20 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_OR %0, %1 @@ -424,40 +417,65 @@ ; CHECK-LABEL: name: test_or_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -493,54 +511,102 @@ ; CHECK-LABEL: name: test_or_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR9]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_OR %0, %1 @@ -555,34 +621,15 @@ bb.0: ; CHECK-LABEL: name: test_or_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -127,77 +127,77 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK-NEXT: G_BR %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C5]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C7]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C7]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 @@ -252,10 +252,9 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] @@ -935,38 +934,39 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO1]] + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV4]](s64) - ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV10]], [[UV12]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV11]], [[UV13]], [[UADDO3]] + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV5]](s64) - ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV14]], [[UV16]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV15]], [[UV17]], [[UADDO5]] + ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[EXTRACT]](<3 x s64>), %bb.0, [[BUILD_VECTOR]](<3 x s64>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[PHI]](<3 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -424,11 +424,9 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 @@ -443,7 +441,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] @@ -463,27 +461,27 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -495,13 +493,11 @@ ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 @@ -528,27 +524,27 @@ ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -557,29 +553,27 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -260,35 +260,23 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 @@ -384,40 +372,65 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C2]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s32) = COPY $vgpr6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -705,10 +705,10 @@ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -109,10 +109,7 @@ ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_shl_s32_s32 @@ -405,43 +405,40 @@ ; SI-LABEL: name: test_shl_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_shl_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_shl_v3s64_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -664,32 +661,33 @@ ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -35,9 +35,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -59,9 +61,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -83,9 +87,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -131,8 +137,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -154,9 +161,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -178,10 +186,13 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32), [[EXTRACT2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -203,9 +214,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -293,50 +306,29 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; CHECK-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR2]](<3 x s32>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST6]](s32), [[LSHR3]](s32), [[BITCAST7]](s32) - ; CHECK-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR3]](<3 x s32>), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -862,66 +862,48 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX8-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX8-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX8-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX8-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX8-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT2]](s32), [[EXTRACT3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT4]](s32), [[EXTRACT5]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -948,12 +930,14 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] @@ -968,12 +952,14 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -997,12 +983,14 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -1016,12 +1004,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smax_s32 @@ -327,12 +327,12 @@ ; SI-LABEL: name: test_smax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -350,7 +350,6 @@ ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -359,6 +358,7 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -376,23 +376,27 @@ ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smin_s32 @@ -327,12 +327,12 @@ ; SI-LABEL: name: test_smin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -350,7 +350,6 @@ ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -359,6 +358,7 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -376,23 +376,27 @@ ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -479,11 +479,9 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) @@ -497,7 +495,7 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) @@ -516,26 +514,26 @@ ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -547,13 +545,11 @@ ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) @@ -577,27 +573,27 @@ ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -609,13 +605,11 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) @@ -639,16 +633,16 @@ ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST4]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -424,11 +424,9 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 @@ -443,7 +441,7 @@ ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] @@ -463,27 +461,27 @@ ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -495,13 +493,11 @@ ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 @@ -528,27 +524,27 @@ ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -557,29 +553,27 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -1,16 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' %s 2> %t.err -o - | FileCheck -check-prefix=SI %s -# RUN: FileCheck -check-prefix=ERR %s < %t.err -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' 2> %t.err %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: FileCheck -check-prefix=ERR %s < %t.err -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s - - -# ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<132 x s16>) = G_CONCAT_VECTORS {{.*}} (in function: test_store_global_v11s16_align4) -# ERR-NOT: remark +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_store_global_s1_align1 @@ -2388,9 +2381,7 @@ ; SI-LABEL: name: test_store_global_v3s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -2429,7 +2420,7 @@ ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -2523,9 +2514,7 @@ ; SI-LABEL: name: test_store_global_v3s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -2542,7 +2531,7 @@ ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) @@ -2594,12 +2583,12 @@ ; SI-LABEL: name: test_store_global_v3s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2626,12 +2615,12 @@ ; SI-LABEL: name: test_store_global_v3s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2658,12 +2647,12 @@ ; SI-LABEL: name: test_store_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -4091,9 +4080,7 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4132,7 +4119,7 @@ ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4230,9 +4217,7 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4249,7 +4234,7 @@ ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) @@ -4305,12 +4290,12 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4341,12 +4326,12 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4377,12 +4362,12 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4763,9 +4748,7 @@ ; SI-LABEL: name: test_store_global_v5s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4838,7 +4821,7 @@ ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4856,18 +4839,16 @@ ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4940,7 +4921,7 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -4958,12 +4939,12 @@ ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1) @@ -4978,9 +4959,7 @@ ; SI-LABEL: name: test_store_global_v5s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5011,7 +4990,7 @@ ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5019,18 +4998,16 @@ ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5061,7 +5038,7 @@ ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5069,12 +5046,12 @@ ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1) @@ -5089,39 +5066,39 @@ ; SI-LABEL: name: test_store_global_v5s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1) @@ -5136,39 +5113,39 @@ ; SI-LABEL: name: test_store_global_v5s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1) @@ -5183,39 +5160,39 @@ ; SI-LABEL: name: test_store_global_v5s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1) @@ -5230,9 +5207,7 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5305,7 +5280,7 @@ ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5324,19 +5299,17 @@ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5409,7 +5382,7 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -5428,12 +5401,12 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1) @@ -5449,9 +5422,7 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5482,7 +5453,7 @@ ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5491,19 +5462,17 @@ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5534,7 +5503,7 @@ ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5543,12 +5512,12 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1) @@ -5564,42 +5533,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1) @@ -5615,42 +5584,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1) @@ -5666,42 +5635,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1) @@ -5717,42 +5686,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<10 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1) @@ -5767,159 +5736,182 @@ ; SI-LABEL: name: test_store_global_v11s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; SI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; SI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; SI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; SI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; CI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; CI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; CI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; CI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; CI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; VI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; VI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; VI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-NEXT: G_STORE [[BITCAST6]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; GFX9-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<11 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1) @@ -5935,42 +5927,46 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<12 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1) @@ -5986,9 +5982,7 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6061,7 +6055,7 @@ ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6080,19 +6074,17 @@ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6165,7 +6157,7 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -6184,12 +6176,12 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 1, addrspace 1) @@ -6205,9 +6197,7 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6238,7 +6228,7 @@ ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -6247,19 +6237,17 @@ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6290,7 +6278,7 @@ ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -6299,12 +6287,12 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 2, addrspace 1) @@ -6320,42 +6308,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 4, addrspace 1) @@ -6371,42 +6359,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 8, addrspace 1) @@ -6422,42 +6410,42 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 16, addrspace 1) @@ -7797,11 +7785,9 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -7874,7 +7860,6 @@ ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) @@ -7940,7 +7925,7 @@ ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7960,27 +7945,26 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8053,7 +8037,6 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) @@ -8119,7 +8102,7 @@ ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) @@ -8139,17 +8122,18 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8169,11 +8153,9 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8204,7 +8186,6 @@ ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) @@ -8230,7 +8211,7 @@ ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) @@ -8240,27 +8221,26 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8291,7 +8271,6 @@ ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) @@ -8317,7 +8296,7 @@ ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) @@ -8327,17 +8306,18 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8357,65 +8337,69 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8435,65 +8419,69 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8513,65 +8501,69 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -152,12 +152,12 @@ ; SI-LABEL: name: test_store_global_v3s32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -473,12 +473,12 @@ ; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_96 ; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -329,11 +329,9 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 @@ -342,7 +340,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] @@ -354,27 +352,27 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -386,39 +384,37 @@ ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -427,29 +423,27 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umax_s32 @@ -331,12 +331,12 @@ ; SI-LABEL: name: test_umax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -355,7 +355,6 @@ ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -364,6 +363,7 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -381,23 +381,27 @@ ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umin_s32 @@ -331,12 +331,12 @@ ; SI-LABEL: name: test_umin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -355,7 +355,6 @@ ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -364,6 +363,7 @@ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -381,23 +381,27 @@ ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -443,67 +443,51 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C2]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR1]](<2 x s16>) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL]] - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[DEF1]](s32) - ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -407,11 +407,9 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) @@ -421,7 +419,7 @@ ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) @@ -436,26 +434,26 @@ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -467,13 +465,11 @@ ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) @@ -489,27 +485,27 @@ ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -521,13 +517,11 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) @@ -543,16 +537,16 @@ ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST4]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -317,18 +317,16 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) @@ -338,27 +336,27 @@ ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -370,39 +368,37 @@ ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -411,29 +407,27 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- | @@ -73,100 +73,88 @@ ; GFX8-LABEL: name: add_v3i16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 ; GFX9-LABEL: name: add_v3i16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV4]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ADD]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %3:_(<2 x s16>) = COPY $vgpr0 %4:_(<2 x s16>) = COPY $vgpr1 - %5:_(<2 x s16>) = G_IMPLICIT_DEF - %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>) - %19:_(s96) = G_BITCAST %6(<6 x s16>) - %20:_(s48) = G_TRUNC %19(s96) - %0:_(<3 x s16>) = G_BITCAST %20(s48) - %8:_(<2 x s16>) = COPY $vgpr2 - %9:_(<2 x s16>) = COPY $vgpr3 - %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>) - %21:_(s96) = G_BITCAST %10(<6 x s16>) - %22:_(s48) = G_TRUNC %21(s96) - %1:_(<3 x s16>) = G_BITCAST %22(s48) + %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) + %10:_(<2 x s16>) = COPY $vgpr2 + %11:_(<2 x s16>) = COPY $vgpr3 + %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) + %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) %2:sgpr_64 = COPY $sgpr30_sgpr31 - %12:_(<3 x s16>) = G_ADD %0, %1 - %16:_(<3 x s16>) = G_IMPLICIT_DEF - %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>) - %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>) - $vgpr0 = COPY %14(<2 x s16>) - $vgpr1 = COPY %15(<2 x s16>) - %13:ccr_sgpr_64 = COPY %2 - S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1 + %17:_(<3 x s16>) = G_ADD %0, %1 + %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) + %24:_(s16) = G_IMPLICIT_DEF + %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) + %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + %18:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1 ... --- @@ -178,116 +166,88 @@ ; GFX8-LABEL: name: shl_v3i16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 ; GFX9-LABEL: name: shl_v3i16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV5]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV6]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV8]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT1]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[COPY5]], [[COPY6]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; GFX9-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %3:_(<2 x s16>) = COPY $vgpr0 %4:_(<2 x s16>) = COPY $vgpr1 - %5:_(<2 x s16>) = G_IMPLICIT_DEF - %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>) - %19:_(s96) = G_BITCAST %6(<6 x s16>) - %20:_(s48) = G_TRUNC %19(s96) - %0:_(<3 x s16>) = G_BITCAST %20(s48) - %8:_(<2 x s16>) = COPY $vgpr2 - %9:_(<2 x s16>) = COPY $vgpr3 - %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>) - %21:_(s96) = G_BITCAST %10(<6 x s16>) - %22:_(s48) = G_TRUNC %21(s96) - %1:_(<3 x s16>) = G_BITCAST %22(s48) + %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) + %10:_(<2 x s16>) = COPY $vgpr2 + %11:_(<2 x s16>) = COPY $vgpr3 + %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) + %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) %2:sgpr_64 = COPY $sgpr30_sgpr31 - %12:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) - %16:_(<3 x s16>) = G_IMPLICIT_DEF - %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>) - %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>) - $vgpr0 = COPY %14(<2 x s16>) - $vgpr1 = COPY %15(<2 x s16>) - %13:ccr_sgpr_64 = COPY %2 - S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1 + %17:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) + %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) + %24:_(s16) = G_IMPLICIT_DEF + %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) + %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + %18:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1 ... --- @@ -389,31 +349,30 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX8-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] @@ -429,103 +388,90 @@ ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]] ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] - ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-LABEL: name: maxnum_v5i16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS2]](<10 x s16>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV7]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV8]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV9]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<10 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV12]](<5 x s16>), 0 - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; GFX9-NEXT: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV14]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV17]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV15]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV18]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC4]] ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV16]] - ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV19]] + ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC5]] ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST8]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC8]](<2 x s16>) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %2:_(<2 x s16>) = COPY $vgpr0 %3:_(<2 x s16>) = COPY $vgpr1 %4:_(<2 x s16>) = COPY $vgpr2 - %5:_(<2 x s16>) = G_IMPLICIT_DEF - %6:_(<10 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>) - %22:_(s160) = G_BITCAST %6(<10 x s16>) - %23:_(s80) = G_TRUNC %22(s160) - %0:_(<5 x s16>) = G_BITCAST %23(s80) - %8:_(<2 x s16>) = COPY $vgpr3 - %9:_(<2 x s16>) = COPY $vgpr4 - %10:_(<2 x s16>) = COPY $vgpr5 - %11:_(<10 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>) - %24:_(s160) = G_BITCAST %11(<10 x s16>) - %25:_(s80) = G_TRUNC %24(s160) - %1:_(<5 x s16>) = G_BITCAST %25(s80) - %14:_(<5 x s16>) = G_FMAXNUM %0, %1 - %18:_(<5 x s16>) = G_IMPLICIT_DEF - %19:_(<10 x s16>) = G_CONCAT_VECTORS %14(<5 x s16>), %18(<5 x s16>) - %15:_(<2 x s16>), %16:_(<2 x s16>), %17:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %19(<10 x s16>) - $vgpr0 = COPY %15(<2 x s16>) - $vgpr1 = COPY %16(<2 x s16>) - $vgpr2 = COPY %17(<2 x s16>) + %5:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16) = G_UNMERGE_VALUES %5(<6 x s16>) + %0:_(<5 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16), %9(s16), %10(s16) + %12:_(<2 x s16>) = COPY $vgpr3 + %13:_(<2 x s16>) = COPY $vgpr4 + %14:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %12(<2 x s16>), %13(<2 x s16>), %14(<2 x s16>) + %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16), %20:_(s16), %21:_(s16) = G_UNMERGE_VALUES %15(<6 x s16>) + %1:_(<5 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16), %20(s16) + %23:_(<5 x s16>) = G_FMAXNUM %0, %1 + %27:_(s16), %28:_(s16), %29:_(s16), %30:_(s16), %31:_(s16) = G_UNMERGE_VALUES %23(<5 x s16>) + %32:_(s16) = G_IMPLICIT_DEF + %33:_(<6 x s16>) = G_BUILD_VECTOR %27(s16), %28(s16), %29(s16), %30(s16), %31(s16), %32(s16) + %24:_(<2 x s16>), %25:_(<2 x s16>), %26:_(<2 x s16>) = G_UNMERGE_VALUES %33(<6 x s16>) + $vgpr0 = COPY %24(<2 x s16>) + $vgpr1 = COPY %25(<2 x s16>) + $vgpr2 = COPY %26(<2 x s16>) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_xor_s32 @@ -305,18 +305,14 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[XOR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_XOR %0, %1 @@ -353,23 +349,20 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_XOR %0, %1 @@ -424,40 +417,65 @@ ; CHECK-LABEL: name: test_xor_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -492,54 +510,102 @@ ; CHECK-LABEL: name: test_xor_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_XOR %0, %1 @@ -554,34 +620,15 @@ bb.0: ; CHECK-LABEL: name: test_xor_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_XOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -142,10 +142,7 @@ ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -707,12 +704,6 @@ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32) ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) @@ -737,28 +728,16 @@ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]] ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C3]] - ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]] - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]] - ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C3]] - ; CHECK-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C3]] - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]] + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32) - ; CHECK-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; CHECK-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C3]] - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]] - ; CHECK-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C3]] - ; CHECK-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]] - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32) - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]] - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32) - ; CHECK-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C3]] - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]] - ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR11]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR12]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]] + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32) ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384) ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -509,13 +509,10 @@ ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff -; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0 -; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1 -; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v1 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v2 +; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-UNPACKED-NEXT: ; return to shader part epilog ; @@ -530,12 +527,11 @@ ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 -; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff -; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0 -; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 -; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: @@ -552,7 +548,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 +; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: load_1d_v3f16_xyz: @@ -565,11 +564,15 @@ ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 ; GFX10-NEXT: ; return to shader part epilog %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -9,46 +9,49 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val } @@ -56,46 +59,49 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_i32_glc ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_glc ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_glc ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) ret i32 %val } @@ -103,61 +109,64 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v2i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX7-LABEL: name: s_buffer_load_v2i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX8-LABEL: name: s_buffer_load_v2i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val } @@ -165,85 +174,79 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v3i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val } @@ -251,133 +254,136 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v8i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX7-LABEL: name: s_buffer_load_v8i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX8-LABEL: name: s_buffer_load_v8i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val } @@ -385,229 +391,232 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v16i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX6: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX6: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX6: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX6: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX6: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX6: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX6: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX6: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX6: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX6: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX6: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX6: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX6: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX6: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX6: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX6: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX6: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX6: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX6: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX6: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX6: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX6: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX6: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX6: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX7-LABEL: name: s_buffer_load_v16i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX7: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX7: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX7: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX7: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX7: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX7: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX7: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX7: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX7: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX7: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX7: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX7: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX7: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX7: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX7: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX7: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX7: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX7: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX7: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX7: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX7: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX7: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX7: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX7: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX8-LABEL: name: s_buffer_load_v16i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX8: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX8: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX8: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX8: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX8: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX8: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX8: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX8: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX8: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX8: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX8: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX8: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX8: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX8: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX8: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX8: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX8: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX8: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX8: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX8: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX8: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX8: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX8: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX8: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val } @@ -615,45 +624,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) ret i32 %val } @@ -661,43 +673,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) ret i32 %val } @@ -705,45 +720,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_255 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) ret i32 %val } @@ -751,43 +769,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_256 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) ret i32 %val } @@ -795,43 +816,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) ret i32 %val } @@ -839,45 +863,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) ret i32 %val } @@ -885,44 +912,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) ret i32 %val } @@ -930,45 +960,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) ret i32 %val } @@ -976,46 +1009,49 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) ret i32 %load } @@ -1023,45 +1059,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) ret i32 %load } @@ -1069,45 +1108,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) ret i32 %load } @@ -1115,45 +1157,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) ret i32 %load } @@ -1161,45 +1206,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) ret i32 %load } @@ -1207,45 +1255,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) ret i32 %load } @@ -1253,45 +1304,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) ret i32 %load } @@ -1299,45 +1353,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) ret i32 %load } @@ -1345,45 +1402,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) ret i32 %load } @@ -1391,44 +1451,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) ret i32 %load } @@ -1436,45 +1499,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) ret i32 %load } @@ -1483,43 +1549,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -1527,52 +1596,55 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val } @@ -1580,73 +1652,64 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val } @@ -1654,64 +1717,67 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val } @@ -1719,94 +1785,97 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val } @@ -1814,148 +1883,151 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val } @@ -1963,43 +2035,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2008,43 +2083,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2053,43 +2131,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2099,94 +2180,97 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2196,94 +2280,97 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2292,148 +2379,151 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2442,148 +2532,151 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2593,109 +2686,124 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -2704,103 +2812,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2810,115 +2933,130 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2928,103 +3066,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val } @@ -3033,107 +3186,122 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val } @@ -3143,154 +3311,169 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3301,166 +3484,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: $vgpr3 = COPY [[COPY11]] - ; GFX6: $vgpr4 = COPY [[COPY12]] - ; GFX6: $vgpr5 = COPY [[COPY13]] - ; GFX6: $vgpr6 = COPY [[COPY14]] - ; GFX6: $vgpr7 = COPY [[COPY15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: $vgpr3 = COPY [[COPY11]] - ; GFX7: $vgpr4 = COPY [[COPY12]] - ; GFX7: $vgpr5 = COPY [[COPY13]] - ; GFX7: $vgpr6 = COPY [[COPY14]] - ; GFX7: $vgpr7 = COPY [[COPY15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: $vgpr3 = COPY [[COPY11]] - ; GFX8: $vgpr4 = COPY [[COPY12]] - ; GFX8: $vgpr5 = COPY [[COPY13]] - ; GFX8: $vgpr6 = COPY [[COPY14]] - ; GFX8: $vgpr7 = COPY [[COPY15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3469,166 +3667,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: $vgpr3 = COPY [[COPY11]] - ; GFX6: $vgpr4 = COPY [[COPY12]] - ; GFX6: $vgpr5 = COPY [[COPY13]] - ; GFX6: $vgpr6 = COPY [[COPY14]] - ; GFX6: $vgpr7 = COPY [[COPY15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: $vgpr3 = COPY [[COPY11]] - ; GFX7: $vgpr4 = COPY [[COPY12]] - ; GFX7: $vgpr5 = COPY [[COPY13]] - ; GFX7: $vgpr6 = COPY [[COPY14]] - ; GFX7: $vgpr7 = COPY [[COPY15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: $vgpr3 = COPY [[COPY11]] - ; GFX8: $vgpr4 = COPY [[COPY12]] - ; GFX8: $vgpr5 = COPY [[COPY13]] - ; GFX8: $vgpr6 = COPY [[COPY14]] - ; GFX8: $vgpr7 = COPY [[COPY15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3637,157 +3850,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3796,157 +4024,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3955,157 +4198,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -4114,154 +4372,169 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY6]] - ; GFX6: $vgpr1 = COPY [[COPY7]] - ; GFX6: $vgpr2 = COPY [[COPY8]] - ; GFX6: $vgpr3 = COPY [[COPY9]] - ; GFX6: $vgpr4 = COPY [[COPY10]] - ; GFX6: $vgpr5 = COPY [[COPY11]] - ; GFX6: $vgpr6 = COPY [[COPY12]] - ; GFX6: $vgpr7 = COPY [[COPY13]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY6]] - ; GFX7: $vgpr1 = COPY [[COPY7]] - ; GFX7: $vgpr2 = COPY [[COPY8]] - ; GFX7: $vgpr3 = COPY [[COPY9]] - ; GFX7: $vgpr4 = COPY [[COPY10]] - ; GFX7: $vgpr5 = COPY [[COPY11]] - ; GFX7: $vgpr6 = COPY [[COPY12]] - ; GFX7: $vgpr7 = COPY [[COPY13]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY6]] - ; GFX8: $vgpr1 = COPY [[COPY7]] - ; GFX8: $vgpr2 = COPY [[COPY8]] - ; GFX8: $vgpr3 = COPY [[COPY9]] - ; GFX8: $vgpr4 = COPY [[COPY10]] - ; GFX8: $vgpr5 = COPY [[COPY11]] - ; GFX8: $vgpr6 = COPY [[COPY12]] - ; GFX8: $vgpr7 = COPY [[COPY13]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val } @@ -4269,43 +4542,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -4314,43 +4590,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -4359,52 +4638,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4414,52 +4696,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4470,49 +4755,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4522,52 +4810,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -530,20 +530,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load @@ -552,20 +552,20 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_i96_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_i96_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load i96, i96 addrspace(4)* %ptr, align 8 ret i96 %load @@ -574,20 +574,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 ret <3 x i32> %load @@ -596,20 +596,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v6i16_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v6i16_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 %cast = bitcast <6 x i16> %load to <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -93,12 +93,7 @@ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) @@ -120,12 +115,7 @@ ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) @@ -435,14 +425,7 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -460,14 +443,7 @@ ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -680,10 +680,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -702,10 +701,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -721,8 +719,9 @@ ; CHECK-LABEL: name: load_constant_v3i32_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -741,10 +740,10 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -763,10 +762,10 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -782,8 +781,9 @@ ; CHECK-LABEL: name: load_constant_v6i16_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16), [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16), [[UV6:%[0-9]+]]:sgpr(s16), [[UV7:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -802,10 +802,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -824,10 +823,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -843,8 +841,8 @@ ; CHECK-LABEL: name: load_constant_i96_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s96) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[LOAD]](s128) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16) S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -34,10 +34,9 @@ ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) - ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !0) $sgpr0_sgpr1_sgpr2 = COPY %1 @@ -57,10 +56,9 @@ ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) - ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1) $sgpr0_sgpr1_sgpr2 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1051,10 +1051,11 @@ ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s4, 0xf000 -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s5, 0xf000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1066,10 +1067,10 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1098,10 +1099,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1129,55 +1129,56 @@ ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: v_mov_b32_e32 v8, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: v_mov_b32_e32 v7, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc @@ -1490,41 +1491,169 @@ ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_movk_i32 s6, 0xf000 -; CGP-NEXT: s_movk_i32 s7, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_movk_i32 s6, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s7, 0xf000 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v11, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1535,27 +1664,27 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1566,195 +1695,69 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v11, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v12, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result @@ -1764,10 +1767,11 @@ ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1779,10 +1783,10 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1811,10 +1815,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1842,55 +1845,56 @@ ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: v_mov_b32_e32 v8, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: v_mov_b32_e32 v7, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc @@ -2203,41 +2207,169 @@ ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s7, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_mov_b32 s6, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v11, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2248,27 +2380,27 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2279,195 +2411,69 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v11, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v12, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1031,10 +1031,11 @@ ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s4, 0xf000 -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s5, 0xf000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1046,10 +1047,10 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1078,10 +1079,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1109,49 +1109,50 @@ ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc @@ -1466,41 +1467,166 @@ ; CGP-LABEL: v_srem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_movk_i32 s6, 0xf000 -; CGP-NEXT: s_movk_i32 s7, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_movk_i32 s6, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s7, 0xf000 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v8, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1511,27 +1637,27 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1542,191 +1668,68 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 -; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_mov_b32_e32 v8, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v9, s4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result @@ -1736,10 +1739,11 @@ ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1751,10 +1755,10 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1783,10 +1787,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1814,49 +1817,50 @@ ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc @@ -2171,41 +2175,166 @@ ; CGP-LABEL: v_srem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s7, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_mov_b32 s6, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v8, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2216,27 +2345,27 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2247,191 +2376,68 @@ ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 -; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_mov_b32_e32 v8, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v9, s4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -968,129 +968,130 @@ ; CHECK-LABEL: v_urem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000 +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CHECK-NEXT: v_mov_b32_e32 v3, s5 -; CHECK-NEXT: v_mov_b32_e32 v4, s7 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v5, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6 -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s4 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 +; CHECK-NEXT: v_mov_b32_e32 v6, s7 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, s5, v3 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_mul_lo_u32 v10, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v4, v7 +; CHECK-NEXT: v_mul_hi_u32 v13, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v13 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v11 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v8, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v9, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v10, s5, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v4, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v13, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s4, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v7 -; CHECK-NEXT: v_mul_lo_u32 v9, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i64 %num, 1235195 ret i64 %result @@ -1357,63 +1358,64 @@ ; CGP-LABEL: v_urem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: s_mov_b32 s8, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s8, 0x12d8fb ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: s_bfe_i32 s5, -1, 0x10000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; CGP-NEXT: s_bfe_i32 s7, -1, 0x10000 ; CGP-NEXT: s_bfe_i32 s9, -1, 0x10000 -; CGP-NEXT: v_mov_b32_e32 v6, v4 -; CGP-NEXT: v_mov_b32_e32 v7, s4 -; CGP-NEXT: v_mov_b32_e32 v8, s5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_mov_b32_e32 v9, s7 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s8 +; CGP-NEXT: v_mov_b32_e32 v8, s4 +; CGP-NEXT: v_mov_b32_e32 v9, s5 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v10 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 ; CGP-NEXT: v_trunc_f32_e32 v10, v10 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v10 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 ; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 -; CGP-NEXT: v_mul_lo_u32 v13, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v14, -1, v4 -; CGP-NEXT: v_mul_hi_u32 v15, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v16, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v17, -1, v5 -; CGP-NEXT: v_mul_hi_u32 v18, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v17, -1, v6 +; CGP-NEXT: v_mul_hi_u32 v18, s6, v6 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; CGP-NEXT: v_mul_lo_u32 v14, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v19, v4, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v16 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v16 +; CGP-NEXT: v_mul_hi_u32 v15, v6, v16 ; CGP-NEXT: v_mul_hi_u32 v16, v10, v16 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_mul_lo_u32 v18, v5, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v6, v12 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_mul_lo_u32 v15, v4, v11 -; CGP-NEXT: v_mul_lo_u32 v17, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v19 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v12 @@ -1424,7 +1426,7 @@ ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v17, v14 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc @@ -1439,38 +1441,38 @@ ; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v13, -1, v4 -; CGP-NEXT: v_mul_hi_u32 v14, s6, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v11, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v12, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v15, -1, v5 -; CGP-NEXT: v_mul_hi_u32 v16, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v17, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v18, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v15, -1, v6 +; CGP-NEXT: v_mul_hi_u32 v16, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v17, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; CGP-NEXT: v_mul_lo_u32 v17, s6, v10 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v12 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v14, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_mul_lo_u32 v16, v5, v15 +; CGP-NEXT: v_mul_lo_u32 v16, v6, v15 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_lo_u32 v14, v4, v13 -; CGP-NEXT: v_mul_lo_u32 v16, v6, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v16, v7, v13 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v13 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v15 @@ -1481,125 +1483,126 @@ ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v17, v5, v15 +; CGP-NEXT: v_mul_hi_u32 v17, v6, v15 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; CGP-NEXT: v_mov_b32_e32 v19, s9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 +; CGP-NEXT: v_mov_b32_e32 v19, s7 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; CGP-NEXT: v_mov_b32_e32 v18, s9 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v16 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v3, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v2, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v1, v5 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v2, v6 -; CGP-NEXT: v_mul_lo_u32 v16, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v17, v2, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v12, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v18, v0, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v10 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v1, v10 -; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v16, v4 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_mul_lo_u32 v11, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v10 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v17 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; CGP-NEXT: v_mul_hi_u32 v11, v2, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v3, v10 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v6 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v13, s8, v4 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s8, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 ; CGP-NEXT: v_mul_lo_u32 v14, s8, v5 -; CGP-NEXT: v_mul_lo_u32 v16, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v15, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, s8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_mul_lo_u32 v6, s8, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v6 +; CGP-NEXT: v_mul_lo_u32 v16, 0, v6 +; CGP-NEXT: v_mul_hi_u32 v6, s8, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v7, s8, v7 ; CGP-NEXT: v_mul_lo_u32 v10, s8, v10 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v5, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 -; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] -; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[6:7] -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12 +; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v3, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2 -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0 +; CGP-NEXT: v_cndmask_b32_e32 v6, v19, v6, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7 -; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v2, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; CGP-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11 -; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4 +; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, v11, v12, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5] -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll @@ -428,30 +428,31 @@ ; ; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX7: ; %bb.0: ; %main_body -; G_GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd +; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd ; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xf +; G_GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX7-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX7-NEXT: s_waitcnt vmcnt(0) ; G_GFX7-NEXT: ds_write_b32 v1, v0 ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX10: ; %bb.0: ; %main_body -; G_GFX10-NEXT: s_clause 0x2 -; G_GFX10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x3c ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX10-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX10-NEXT: s_load_dword s0, s[0:1], 0x3c ; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX10-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX10-NEXT: s_waitcnt vmcnt(0) ; G_GFX10-NEXT: ds_write_b32 v1, v0 ; G_GFX10-NEXT: s_endpgm @@ -459,14 +460,14 @@ ; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX1030: ; %bb.0: ; %main_body ; G_GFX1030-NEXT: s_clause 0x2 -; G_GFX1030-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; G_GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; G_GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; G_GFX1030-NEXT: s_load_dword s2, s[0:1], 0x3c +; G_GFX1030-NEXT: s_load_dword s0, s[0:1], 0x3c ; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX1030-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX1030-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX1030-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX1030-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX1030-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX1030-NEXT: s_waitcnt vmcnt(0) ; G_GFX1030-NEXT: ds_write_b32 v1, v0 ; G_GFX1030-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll --- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll @@ -244,25 +244,25 @@ ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f32 v1, v2, v1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v1, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen ; G_GFX9-NEXT: s_endpgm @@ -533,25 +533,25 @@ ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f32 v1, v2, v1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v1, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen ; G_GFX9-NEXT: s_endpgm @@ -800,23 +800,23 @@ ; G_GFX7: ; %bb.0: ; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb +; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb ; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; G_GFX7-NEXT: s_mov_b32 s10, -1 ; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000 ; G_GFX7-NEXT: s_add_u32 s8, s8, s3 +; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX7-NEXT: s_mov_b32 s4, 0 +; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: s_add_i32 s2, s2, 4 -; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX7-NEXT: s_add_i32 s4, s4, 4 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 ; G_GFX7-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -834,23 +834,23 @@ ; G_VI: ; %bb.0: ; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; G_VI-NEXT: s_mov_b32 s90, -1 ; G_VI-NEXT: s_mov_b32 s91, 0xe80000 ; G_VI-NEXT: s_add_u32 s88, s88, s3 +; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 -; G_VI-NEXT: s_mov_b32 s4, 0 +; G_VI-NEXT: s_mov_b32 s3, 0x40450000 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) -; G_VI-NEXT: s_add_i32 s2, s2, 4 -; G_VI-NEXT: s_mov_b32 s5, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s4 -; G_VI-NEXT: s_lshl_b32 s3, s2, 3 -; G_VI-NEXT: v_mov_b32_e32 v1, s5 -; G_VI-NEXT: v_mov_b32_e32 v2, s3 +; G_VI-NEXT: s_add_i32 s4, s4, 4 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_VI-NEXT: s_lshl_b32 s2, s2, 4 +; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 ; G_VI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -868,28 +868,28 @@ ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; G_GFX9-NEXT: s_mov_b32 s0, 0 +; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v2, s1 +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 -; G_GFX9-NEXT: v_mov_b32_e32 v4, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v5, v[0:1] ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] -; G_GFX9-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -897,31 +897,31 @@ ; ; G_GFX10-LABEL: lds_ds_fmin_f64: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_clause 0x1 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; G_GFX10-NEXT: s_mov_b32 s10, -1 ; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 ; G_GFX10-NEXT: s_add_u32 s8, s8, s3 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX10-NEXT: s_mov_b32 s0, 0 ; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX10-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_add_i32 s2, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v5, s7 -; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX10-NEXT: s_add_i32 s4, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v5, s3 +; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3] -; G_GFX10-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -1143,23 +1143,23 @@ ; G_GFX7: ; %bb.0: ; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb +; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb ; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; G_GFX7-NEXT: s_mov_b32 s10, -1 ; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000 ; G_GFX7-NEXT: s_add_u32 s8, s8, s3 +; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX7-NEXT: s_mov_b32 s4, 0 +; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: s_add_i32 s2, s2, 4 -; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX7-NEXT: s_add_i32 s4, s4, 4 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 ; G_GFX7-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -1177,23 +1177,23 @@ ; G_VI: ; %bb.0: ; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; G_VI-NEXT: s_mov_b32 s90, -1 ; G_VI-NEXT: s_mov_b32 s91, 0xe80000 ; G_VI-NEXT: s_add_u32 s88, s88, s3 +; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 -; G_VI-NEXT: s_mov_b32 s4, 0 +; G_VI-NEXT: s_mov_b32 s3, 0x40450000 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) -; G_VI-NEXT: s_add_i32 s2, s2, 4 -; G_VI-NEXT: s_mov_b32 s5, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s4 -; G_VI-NEXT: s_lshl_b32 s3, s2, 3 -; G_VI-NEXT: v_mov_b32_e32 v1, s5 -; G_VI-NEXT: v_mov_b32_e32 v2, s3 +; G_VI-NEXT: s_add_i32 s4, s4, 4 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_VI-NEXT: s_lshl_b32 s2, s2, 4 +; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 ; G_VI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -1211,28 +1211,28 @@ ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; G_GFX9-NEXT: s_mov_b32 s0, 0 +; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v2, s1 +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 -; G_GFX9-NEXT: v_mov_b32_e32 v4, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v5, v[0:1] ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] -; G_GFX9-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -1240,31 +1240,31 @@ ; ; G_GFX10-LABEL: lds_ds_fmax_f64: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_clause 0x1 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; G_GFX10-NEXT: s_mov_b32 s10, -1 ; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 ; G_GFX10-NEXT: s_add_u32 s8, s8, s3 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX10-NEXT: s_mov_b32 s0, 0 ; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX10-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_add_i32 s2, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v5, s7 -; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX10-NEXT: s_add_i32 s4, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v5, s3 +; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3] -; G_GFX10-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -13,15 +13,12 @@ ; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]] ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4) - ; CHECK-NEXT: undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY killed %16 - ; CHECK-NEXT: [[COPY2]].sub2:sgpr_96_with_sub0_sub1 = COPY undef [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0 - ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1 - ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -1370,26 +1370,17 @@ CHECK: [[IMP_DEF0:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF CHECK: [[IMP_DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF CHECK: [[VALUE0:%[0-9]+]]:_(s32), [[VALUE1:%[0-9]+]]:_(s32), [[VALUE2:%[0-9]+]]:_(s32), [[VALUE3:%[0-9]+]]:_(s32), [[VALUE4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF0]]:_(<5 x s32>) - CHECK: [[IMP_DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF CHECK: [[VECTOR0:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE0]]:_(s32), [[VALUE1]]:_(s32) CHECK: [[VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE2]]:_(s32), [[VALUE3]]:_(s32) - CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE4]]:_(s32), [[IMP_DEF2]]:_(s32) - CHECK: [[IMP_DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF CHECK: [[VALUE5:%[0-9]+]]:_(s32), [[VALUE6:%[0-9]+]]:_(s32), [[VALUE7:%[0-9]+]]:_(s32), [[VALUE8:%[0-9]+]]:_(s32), [[VALUE9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF1]]:_(<5 x s32>) - CHECK: [[IMP_DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32) - CHECK: [[VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32) - CHECK: [[VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE9]]:_(s32), [[IMP_DEF4]]:_(s32) - CHECK: [[IMP_DEF5:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - - CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR3]]:_ - CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR4]]:_ - CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR2]]:_, [[VECTOR5]]:_ - CHECK: [[IMP_DEF6:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - - CHECK: [[VECTOR6:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>) - CHECK: [[VECTOR7:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>) - CHECK: [[VECTOR8:%[0-9]+]]:_(<5 x s32>), [[VECTOR9:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[VECTOR7]]:_(<10 x s32>) + CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32) + CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32) + CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR2]]:_ + CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR3]]:_ + CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[VALUE4]]:_, [[VALUE9]]:_ + CHECK: [[AND0_E0:%[0-9]+]]:_(s32), [[AND0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND0]]:_(<2 x s32>) + CHECK: [[AND1_E0:%[0-9]+]]:_(s32), [[AND1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]]:_(<2 x s32>) + CHECK: [[RESULT:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[AND0_E0]]:_(s32), [[AND0_E1]]:_(s32), [[AND1_E0]]:_(s32), [[AND1_E1]]:_(s32), [[AND2]]:_(s32) )"; // Check @@ -1428,12 +1419,19 @@ auto CheckStr = R"( CHECK: [[BITCAST0:%[0-9]+]]:_(<2 x s32>) = G_BITCAST CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST - CHECK: [[IMP_DEF0:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CONCAT0:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>) - CHECK: [[IMP_DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CONCAT1:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>) - CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[CONCAT0]]:_, [[CONCAT1]]:_ - CHECK: (<2 x s32>) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>) + + CHECK: [[BITCAST0_E0:%[0-9]+]]:_(s32), [[BITCAST0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST0]]:_(<2 x s32>) + CHECK: [[IMP_DEF0:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST0_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST0_E0]]:_(s32), [[BITCAST0_E1]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32) + + CHECK: [[BITCAST1_E0:%[0-9]+]]:_(s32), [[BITCAST1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]]:_(<2 x s32>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST1_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST1_E0]]:_(s32), [[BITCAST1_E1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32) + + CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[BITCAST0_LARGE]]:_, [[BITCAST1_LARGE]]:_ + + CHECK: [[AND_E0:%[0-9]+]]:_(s32), [[AND_E1:%[0-9]+]]:_(s32), [[AND_E2:%[0-9]+]]:_(s32), [[AND_E3:%[0-9]+]]:_(s32), [[AND_E4:%[0-9]+]]:_(s32), [[AND_E5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>) + CHECK: (<2 x s32>) = G_BUILD_VECTOR [[AND_E0]]:_(s32), [[AND_E1]]:_(s32) )"; EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; @@ -1509,26 +1507,26 @@ auto CheckStr = R"( CHECK: [[INITVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - CHECK: [[EXTRACT0:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 0 - CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 64 - CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 128 + CHECK: [[INITVAL_E0:%[0-9]+]]:_(s32), [[INITVAL_E1:%[0-9]+]]:_(s32), [[INITVAL_E2:%[0-9]+]]:_(s32), [[INITVAL_E3:%[0-9]+]]:_(s32), [[INITVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INITVAL]]:_(<5 x s32>) + CHECK: [[INITVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E0]]:_(s32), [[INITVAL_E1]]:_(s32) + CHECK: [[INITVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E2]]:_(s32), [[INITVAL_E3]]:_(s32) CHECK: G_BRCOND CHECK: [[MIDVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 0 - CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 64 - CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 128 + CHECK: [[MIDVAL_E0:%[0-9]+]]:_(s32), [[MIDVAL_E1:%[0-9]+]]:_(s32), [[MIDVAL_E2:%[0-9]+]]:_(s32), [[MIDVAL_E3:%[0-9]+]]:_(s32), [[MIDVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MIDVAL]]:_(<5 x s32>) + CHECK: [[MIDVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E0]]:_(s32), [[MIDVAL_E1]]:_(s32) + CHECK: [[MIDVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E2]]:_(s32), [[MIDVAL_E3]]:_(s32) CHECK: G_BR - CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT0]]:_(<2 x s32>), %bb.0, [[EXTRACT3]]:_(<2 x s32>), %bb.1 - CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT1]]:_(<2 x s32>), %bb.0, [[EXTRACT4]]:_(<2 x s32>), %bb.1 - CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1 - - CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI - + CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E01]]:_(<2 x s32>), %bb.0, [[MIDVAL_E01]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E23]]:_(<2 x s32>), %bb.0, [[MIDVAL_E23]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[INITVAL_E4]]:_(s32), %bb.0, [[MIDVAL_E4]]:_(s32), %bb.1 CHECK: [[UNMERGE0:%[0-9]+]]:_(s32), [[UNMERGE1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI0]]:_(<2 x s32>) CHECK: [[UNMERGE2:%[0-9]+]]:_(s32), [[UNMERGE3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI1]]:_(<2 x s32>) CHECK: [[BV:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UNMERGE0]]:_(s32), [[UNMERGE1]]:_(s32), [[UNMERGE2]]:_(s32), [[UNMERGE3]]:_(s32), [[PHI2]]:_(s32) + + CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI + CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[BV]]:_, [[BV]]:_ )"; @@ -3196,14 +3194,8 @@ CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_ CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[OR]] - CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[V2S32]] - CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[S32]] - CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT - CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]]:_, [[C]]:_(s64) - CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT - CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[BITCAST]]:_, [[C]]:_ - CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_ - CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[OR]] + CHECK: [[V2S32_E0:%[0-9]+]]:_(s32), [[V2S32_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[V2S32]] + CHECK: [[BV:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[V2S32_E0]]:_(s32), [[S32]]:_(s32) )"; // Check @@ -3711,20 +3703,22 @@ DefineLegalizerInfo(A, {}); // Make sure that G_FREEZE is narrowed using unmerge/extract - LLT S16{LLT::scalar(16)}; LLT S32{LLT::scalar(32)}; LLT S33{LLT::scalar(33)}; + LLT S48{LLT::scalar(48)}; LLT S64{LLT::scalar(64)}; LLT V2S16{LLT::fixed_vector(2, 16)}; - LLT V2S32{LLT::fixed_vector(2, 32)}; + LLT V3S16{LLT::fixed_vector(3, 16)}; + LLT V4S16{LLT::fixed_vector(4, 16)}; auto Trunc = B.buildTrunc(S33, {Copies[0]}); - auto Vector = B.buildBitcast(V2S32, Copies[0]); + auto Trunc1 = B.buildTrunc(S48, {Copies[0]}); + auto Vector = B.buildBitcast(V3S16, Trunc1); auto FreezeScalar = B.buildInstr(TargetOpcode::G_FREEZE, {S64}, {Copies[0]}); auto FreezeOdd = B.buildInstr(TargetOpcode::G_FREEZE, {S33}, {Trunc}); - auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector}); - auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector}); + auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector}); + auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector}); AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; @@ -3736,54 +3730,48 @@ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeScalar, 0, S32)); + // This should be followed by narrowScalar to S32. B.setInsertPt(*EntryMBB, FreezeOdd->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeOdd, 0, S32)); + Helper.widenScalar(*FreezeOdd, 0, S64)); B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeVector, 0, V2S16)); + Helper.fewerElementsVector(*FreezeVector, 0, V2S16)); + // This should be followed by fewerElements to V2S16. B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeVector1, 0, S16)); + Helper.moreElementsVector(*FreezeVector1, 0, V4S16)); const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]] - CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] + CHECK: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]] + CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]] CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]] CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[UV]] CHECK: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[UV1]] CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE]]:_(s32), [[FREEZE1]] - CHECK: (s1) = G_UNMERGE_VALUES [[TRUNC]]:_(s33) - CHECK: [[UNDEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES - CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES - CHECK: [[UNDEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[MV1]] - CHECK: [[FREEZE3:%[0-9]+]]:_(s32) = G_FREEZE [[MV2]] - CHECK: [[UNDEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[MV3:%[0-9]+]]:_(s1056) = G_MERGE_VALUES [[FREEZE2]]:_(s32), [[FREEZE3]]:_(s32), [[UNDEF2]] - CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[MV3]] - - CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]] - CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]] - CHECK: [[FREEZE4:%[0-9]+]]:_(s32) = G_FREEZE [[UV2]] - CHECK: [[FREEZE5:%[0-9]+]]:_(s32) = G_FREEZE [[UV3]] - CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE4]]:_(s32), [[FREEZE5]]:_(s32) - CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV4]] - - CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]] - CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST3]] - CHECK: [[FREEZE6:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]] - CHECK: [[FREEZE7:%[0-9]+]]:_(s16) = G_FREEZE [[UV5]] - CHECK: [[FREEZE8:%[0-9]+]]:_(s16) = G_FREEZE [[UV6]] - CHECK: [[FREEZE9:%[0-9]+]]:_(s16) = G_FREEZE [[UV7]] - CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE6]]:_(s16), [[FREEZE7]]:_(s16), [[FREEZE8]]:_(s16), [[FREEZE9]] - CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV5]] + CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]] + CHECK: [[FREEZE2:%[0-9]+]]:_(s64) = G_FREEZE [[ANYEXT]] + CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[FREEZE2]] + + CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]] + CHECK: [[BV:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]]:_(s16), [[UV3]]:_(s16) + CHECK: [[FREEZE3:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[BV]] + CHECK: [[FREEZE4:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]] + CHECK: [[FREEZE3_E0:%[0-9]+]]:_(s16), [[FREEZE3_E1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE3]] + CHECK: [[BV1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE3_E0]]:_(s16), [[FREEZE3_E1]]:_(s16), [[FREEZE4]]:_(s16) + + CHECK: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]] + CHECK: [[IMP_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + CHECK: [[BV1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV5]]:_(s16), [[UV6]]:_(s16), [[UV7]]:_(s16), [[IMP_DEF]]:_(s16) + CHECK: [[FREEZE5:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[BV1]] + CHECK: [[FREEZE5_E0:%[0-9]+]]:_(s16), [[FREEZE5_E1:%[0-9]+]]:_(s16), [[FREEZE5_E2:%[0-9]+]]:_(s16), [[FREEZE5_E3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE5]] + CHECK: [[BV2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE5_E0]]:_(s16), [[FREEZE5_E1]]:_(s16), [[FREEZE5_E2]]:_(s16) )"; // Check @@ -3869,10 +3857,12 @@ const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] - CHECK: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CV:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST]]:_(<2 x s32>), [[UNDEF]] - CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[CV]] - CHECK: [[EXTR0:%[0-9]+]]:_(<2 x s32>), [[EXTR1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) + CHECK: [[BITCAST_E0:%[0-9]+]]:_(s32), [[BITCAST_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]]:_(<2 x s32>) + CHECK: [[IMP_DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST_LARGE:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST_E0]]:_(s32), [[BITCAST_E1]]:_(s32), [[IMP_DEF]]:_(s32), [[IMP_DEF]]:_(s32) + CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BITCAST_LARGE]] + CHECK: [[FREEZE_E0:%[0-9]+]]:_(s32), [[FREEZE_E1:%[0-9]+]]:_(s32), [[FREEZE_E2:%[0-9]+]]:_(s32), [[FREEZE_E3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) + CHECK: (<2 x s32>) = G_BUILD_VECTOR [[FREEZE_E0]]:_(s32), [[FREEZE_E1]]:_(s32) )"; // Check @@ -4040,14 +4030,19 @@ CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY CHECK: [[BV1:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR CHECK: [[BV2:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR - CHECK: [[IMPDEF1:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[INSERT1:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF1]]:_, [[BV1]]:_(<6 x s64>), 0 - CHECK: [[IMPDEF2:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[INSERT2:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF2]]:_, [[BV2]]:_(<6 x s64>), 0 - CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[INSERT1]]:_(<8 x s64>), [[INSERT2]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) - CHECK: [[IMPDEF3:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[CONCAT:%[0-9]+]]:_(<24 x s64>) = G_CONCAT_VECTORS [[SHUF]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>) - CHECK: [[UNMERGE:%[0-9]+]]:_(<6 x s64>), [[UNMERGE2:%[0-9]+]]:_(<6 x s64>), [[UNMERGE3:%[0-9]+]]:_(<6 x s64>), [[UNMERGE4:%[0-9]+]]:_(<6 x s64>) = G_UNMERGE_VALUES [[CONCAT]]:_(<24 x s64>) + + CHECK: [[BV1_E0:%[0-9]+]]:_(s64), [[BV1_E1:%[0-9]+]]:_(s64), [[BV1_E2:%[0-9]+]]:_(s64), [[BV1_E3:%[0-9]+]]:_(s64), [[BV1_E4:%[0-9]+]]:_(s64), [[BV1_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV1]]:_(<6 x s64>) + CHECK: [[IMP_DEF0:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + CHECK: [[BV1_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV1_E0]]:_(s64), [[BV1_E1]]:_(s64), [[BV1_E2]]:_(s64), [[BV1_E3]]:_(s64), [[BV1_E4]]:_(s64), [[BV1_E5]]:_(s64), [[IMP_DEF0]]:_(s64), [[IMP_DEF0]]:_(s64) + + CHECK: [[BV2_E0:%[0-9]+]]:_(s64), [[BV2_E1:%[0-9]+]]:_(s64), [[BV2_E2:%[0-9]+]]:_(s64), [[BV2_E3:%[0-9]+]]:_(s64), [[BV2_E4:%[0-9]+]]:_(s64), [[BV2_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV2]]:_(<6 x s64>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + CHECK: [[BV2_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV2_E0]]:_(s64), [[BV2_E1]]:_(s64), [[BV2_E2]]:_(s64), [[BV2_E3]]:_(s64), [[BV2_E4]]:_(s64), [[BV2_E5]]:_(s64), [[IMP_DEF1]]:_(s64), [[IMP_DEF1]]:_(s64) + + CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[BV1_LARGE]]:_(<8 x s64>), [[BV2_LARGE]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) + + CHECK: [[SHUF_E0:%[0-9]+]]:_(s64), [[SHUF_E1:%[0-9]+]]:_(s64), [[SHUF_E2:%[0-9]+]]:_(s64), [[SHUF_E3:%[0-9]+]]:_(s64), [[SHUF_E4:%[0-9]+]]:_(s64), [[SHUF_E5:%[0-9]+]]:_(s64), [[SHUF_E6:%[0-9]+]]:_(s64), [[SHUF_E7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]]:_(<8 x s64>) + CHECK: (<6 x s64>) = G_BUILD_VECTOR [[SHUF_E0]]:_(s64), [[SHUF_E1]]:_(s64), [[SHUF_E2]]:_(s64), [[SHUF_E3]]:_(s64), [[SHUF_E4]]:_(s64), [[SHUF_E5]]:_(s64) )"; // Check