Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -228,6 +228,119 @@ MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]); } +/// Pack values \p SrcRegs to cover the vector type result \p DstRegs. +static MachineInstrBuilder +mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, + ArrayRef SrcRegs) { + MachineRegisterInfo &MRI = *B.getMRI(); + LLT LLTy = MRI.getType(DstRegs[0]); + LLT PartLLT = MRI.getType(SrcRegs[0]); + + // Deal with v3s16 split into v2s16 + LLT LCMTy = getLCMType(LLTy, PartLLT); + if (LCMTy == LLTy) { + // Common case where no padding is needed. + assert(DstRegs.size() == 1); + return B.buildConcatVectors(DstRegs[0], SrcRegs); + } + + const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); + Register Undef = B.buildUndef(PartLLT).getReg(0); + + // Build vector of undefs. + SmallVector WidenedSrcs(NumWide, Undef); + + // Replace the first sources with the real registers. + std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); + + auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs); + int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); + + SmallVector PadDstRegs(NumDst); + std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin()); + + // Create the excess dead defs for the unmerge. + for (int I = DstRegs.size(); I != NumDst; ++I) + PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); + + return B.buildUnmerge(PadDstRegs, Widened); +} + +/// Create a sequence of instructions to combine pieces split into register +/// typed values to the original IR value. \p OrigRegs contains the destination +/// value registers of type \p LLTy, and \p Regs contains the legalized pieces +/// with type \p PartLLT. +static void buildCopyToParts(MachineIRBuilder &B, ArrayRef OrigRegs, + ArrayRef Regs, LLT LLTy, LLT PartLLT) { + MachineRegisterInfo &MRI = *B.getMRI(); + + if (!LLTy.isVector() && !PartLLT.isVector()) { + assert(OrigRegs.size() == 1); + LLT OrigTy = MRI.getType(OrigRegs[0]); + + unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size(); + if (SrcSize == OrigTy.getSizeInBits()) + B.buildMerge(OrigRegs[0], Regs); + else { + auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs); + B.buildTrunc(OrigRegs[0], Widened); + } + + return; + } + + if (LLTy.isVector() && PartLLT.isVector()) { + assert(OrigRegs.size() == 1); + assert(LLTy.getElementType() == PartLLT.getElementType()); + mergeVectorRegsToResultRegs(B, OrigRegs, Regs); + return; + } + + assert(LLTy.isVector() && !PartLLT.isVector()); + + LLT DstEltTy = LLTy.getElementType(); + + // Pointer information was discarded. We'll need to coerce some register types + // to avoid violating type constraints. + LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType(); + + assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); + + if (DstEltTy == PartLLT) { + // Vector was trivially scalarized. + + if (RealDstEltTy.isPointer()) { + for (Register Reg : Regs) + MRI.setType(Reg, RealDstEltTy); + } + + B.buildBuildVector(OrigRegs[0], Regs); + } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { + // Deal with vector with 64-bit elements decomposed to 32-bit + // registers. Need to create intermediate 64-bit elements. + SmallVector EltMerges; + int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); + + assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + + for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { + auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt)); + // Fix the type in case this is really a vector of pointers. + MRI.setType(Merge.getReg(0), RealDstEltTy); + EltMerges.push_back(Merge.getReg(0)); + Regs = Regs.drop_front(PartsPerElt); + } + + B.buildBuildVector(OrigRegs[0], EltMerges); + } else { + // Vector was split, and elements promoted to a wider type. + // FIXME: Should handle floating point promotions. + LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); + auto BV = B.buildBuildVector(BVType, Regs); + B.buildTrunc(OrigRegs[0], BV); + } +} + bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, SmallVectorImpl &Args, ValueHandler &Handler, @@ -278,9 +391,6 @@ } assert(NumParts > 1); - // For now only handle exact splits. - if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) - return false; // For incoming arguments (physregs to vregs), we could have values in // physregs (or memlocs) which we want to extract and copy to vregs. @@ -379,6 +489,7 @@ EVT OrigVT = EVT::getEVT(Args[i].Ty); EVT VAVT = VA.getValVT(); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); + const LLT VATy(VAVT.getSimpleVT()); // Expected to be multiple regs for a single incoming arg. // There should be Regs.size() ArgLocs per argument. @@ -427,7 +538,6 @@ } // This ArgLoc covers multiple pieces, so we need to split it. - const LLT VATy(VAVT.getSimpleVT()); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); @@ -451,12 +561,12 @@ // Now that all pieces have been handled, re-pack any arguments into any // wider, original registers. if (Handler.isIncomingArgumentHandler()) { - if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) { - assert(NumArgRegs >= 2); + // Merge the split registers into the expected larger result vregs of + // the original call. - // Merge the split registers into the expected larger result vreg - // of the original call. - MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + if (OrigTy != VATy && !Args[i].OrigRegs.empty()) { + buildCopyToParts(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy, + VATy); } } Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -735,117 +735,6 @@ return true; } -/// Pack values \p SrcRegs to cover the vector type result \p DstRegs. -static MachineInstrBuilder mergeVectorRegsToResultRegs( - MachineIRBuilder &B, ArrayRef DstRegs, ArrayRef SrcRegs) { - MachineRegisterInfo &MRI = *B.getMRI(); - LLT LLTy = MRI.getType(DstRegs[0]); - LLT PartLLT = MRI.getType(SrcRegs[0]); - - // Deal with v3s16 split into v2s16 - LLT LCMTy = getLCMType(LLTy, PartLLT); - if (LCMTy == LLTy) { - // Common case where no padding is needed. - assert(DstRegs.size() == 1); - return B.buildConcatVectors(DstRegs[0], SrcRegs); - } - - const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); - Register Undef = B.buildUndef(PartLLT).getReg(0); - - // Build vector of undefs. - SmallVector WidenedSrcs(NumWide, Undef); - - // Replace the first sources with the real registers. - std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); - - auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs); - int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); - - SmallVector PadDstRegs(NumDst); - std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin()); - - // Create the excess dead defs for the unmerge. - for (int I = DstRegs.size(); I != NumDst; ++I) - PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); - - return B.buildUnmerge(PadDstRegs, Widened); -} - -// TODO: Move this to generic code -static void packSplitRegsToOrigType(MachineIRBuilder &B, - ArrayRef OrigRegs, - ArrayRef Regs, - LLT LLTy, - LLT PartLLT) { - MachineRegisterInfo &MRI = *B.getMRI(); - - if (!LLTy.isVector() && !PartLLT.isVector()) { - assert(OrigRegs.size() == 1); - LLT OrigTy = MRI.getType(OrigRegs[0]); - - unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size(); - if (SrcSize == OrigTy.getSizeInBits()) - B.buildMerge(OrigRegs[0], Regs); - else { - auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs); - B.buildTrunc(OrigRegs[0], Widened); - } - - return; - } - - if (LLTy.isVector() && PartLLT.isVector()) { - assert(OrigRegs.size() == 1); - assert(LLTy.getElementType() == PartLLT.getElementType()); - mergeVectorRegsToResultRegs(B, OrigRegs, Regs); - return; - } - - assert(LLTy.isVector() && !PartLLT.isVector()); - - LLT DstEltTy = LLTy.getElementType(); - - // Pointer information was discarded. We'll need to coerce some register types - // to avoid violating type constraints. - LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType(); - - assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); - - if (DstEltTy == PartLLT) { - // Vector was trivially scalarized. - - if (RealDstEltTy.isPointer()) { - for (Register Reg : Regs) - MRI.setType(Reg, RealDstEltTy); - } - - B.buildBuildVector(OrigRegs[0], Regs); - } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { - // Deal with vector with 64-bit elements decomposed to 32-bit - // registers. Need to create intermediate 64-bit elements. - SmallVector EltMerges; - int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); - - assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); - - for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { - auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt)); - // Fix the type in case this is really a vector of pointers. - MRI.setType(Merge.getReg(0), RealDstEltTy); - EltMerges.push_back(Merge.getReg(0)); - Regs = Regs.drop_front(PartsPerElt); - } - - B.buildBuildVector(OrigRegs[0], EltMerges); - } else { - // Vector was split, and elements promoted to a wider type. - LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); - auto BV = B.buildBuildVector(BVType, Regs); - B.buildTrunc(OrigRegs[0], BV); - } -} - bool AMDGPUCallLowering::lowerFormalArguments( MachineIRBuilder &B, const Function &F, ArrayRef> VRegs, FunctionLoweringInfo &FLI) const { @@ -886,7 +775,6 @@ CCInfo.AllocateReg(ImplicitBufferPtrReg); } - SmallVector SplitArg; SmallVector SplitArgs; unsigned Idx = 0; unsigned PSInputNum = 0; @@ -936,19 +824,7 @@ const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex; setArgFlags(OrigArg, OrigArgIdx, DL, F); - SplitArg.clear(); - splitToValueTypes(B, OrigArg, SplitArg, DL, CC); - - processSplitArgs(B, OrigArg, SplitArg, SplitArgs, DL, CC, false, - // FIXME: We should probably be passing multiple registers - // to handleAssignments to do this - [&](ArrayRef Regs, Register DstReg, LLT LLTy, - LLT PartLLT, int VTSplitIdx) { - assert(DstReg == VRegs[Idx][VTSplitIdx]); - packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, - LLTy, PartLLT); - }); - + splitToValueTypes(B, OrigArg, SplitArgs, DL, CC); ++Idx; } @@ -1356,19 +1232,7 @@ insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs, Info.DemoteRegister, Info.DemoteStackIndex); } else if (!Info.OrigRet.Ty->isVoidTy()) { - SmallVector PreSplitRetInfos; - - splitToValueTypes( - MIRBuilder, Info.OrigRet, PreSplitRetInfos/*InArgs*/, DL, Info.CallConv); - - processSplitArgs(MIRBuilder, Info.OrigRet, - PreSplitRetInfos, InArgs/*SplitRetInfos*/, DL, Info.CallConv, false, - [&](ArrayRef Regs, Register DstReg, - LLT LLTy, LLT PartLLT, int VTSplitIdx) { - assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]); - packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx], - Regs, LLTy, PartLLT); - }); + splitToValueTypes(MIRBuilder, Info.OrigRet, InArgs, DL, Info.CallConv); } // Make sure the raw argument copies are inserted before the marshalling to Index: llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -285,15 +285,8 @@ define amdgpu_ps i32 @s_andn2_v2i16(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { ; GFX6-LABEL: s_andn2_v2i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_and_b32 s1, s4, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_and_b32 s0, s0, s1 +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_and_b32 s0, s2, s0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_v2i16: @@ -309,15 +302,8 @@ define amdgpu_ps i32 @s_andn2_v2i16_commute(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { ; GFX6-LABEL: s_andn2_v2i16_commute: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_and_b32 s1, s4, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_and_b32 s0, s1, s0 +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_v2i16_commute: @@ -333,15 +319,8 @@ define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { ; GFX6-LABEL: s_andn2_v2i16_multi_use: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_and_b32 s1, s4, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_and_b32 s0, s0, s1 +; GFX6-NEXT: s_xor_b32 s1, s3, -1 +; GFX6-NEXT: s_and_b32 s0, s2, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_v2i16_multi_use: @@ -362,19 +341,9 @@ define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_foldable_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1, <2 x i16> inreg %src2) { ; GFX6-LABEL: s_andn2_v2i16_multi_foldable_use: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_and_b32 s3, s4, s1 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_or_b32 s2, s2, s3 -; GFX6-NEXT: s_lshl_b32 s3, s7, 16 -; GFX6-NEXT: s_and_b32 s1, s6, s1 -; GFX6-NEXT: s_or_b32 s1, s3, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_and_b32 s0, s0, s1 -; GFX6-NEXT: s_and_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s4, -1 +; GFX6-NEXT: s_and_b32 s0, s2, s1 +; GFX6-NEXT: s_and_b32 s1, s3, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_v2i16_multi_foldable_use: @@ -397,13 +366,6 @@ ; GFX6-LABEL: v_andn2_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -473,13 +473,13 @@ ; GFX6-LABEL: v_ashr_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 -; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX6-NEXT: v_ashrrev_i32_e32 v1, v2, v1 +; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0 +; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 16 +; GFX6-NEXT: v_ashrrev_i32_e32 v1, v3, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ashr_v2i16: @@ -503,6 +503,7 @@ ; GFX6-LABEL: v_ashr_v2i16_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0 @@ -531,12 +532,13 @@ ; GFX6-LABEL: s_ashr_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s4 +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 +; GFX6-NEXT: s_and_b32 s1, s1, s4 ; GFX6-NEXT: s_sext_i32_i16 s0, s0 -; GFX6-NEXT: s_ashr_i32 s0, s0, s2 -; GFX6-NEXT: s_and_b32 s2, s3, s4 -; GFX6-NEXT: s_sext_i32_i16 s1, s1 -; GFX6-NEXT: s_ashr_i32 s1, s1, s2 +; GFX6-NEXT: s_ashr_i32 s0, s0, s1 +; GFX6-NEXT: s_sext_i32_i16 s1, s2 +; GFX6-NEXT: s_ashr_i32 s1, s1, s3 ; GFX6-NEXT: s_and_b32 s1, s1, s4 ; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 @@ -577,10 +579,11 @@ ; GFX6-LABEL: ashr_v2i16_sv: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: s_sext_i32_i16 s0, s0 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 -; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX6-NEXT: s_sext_i32_i16 s0, s1 ; GFX6-NEXT: v_ashr_i32_e32 v1, s0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 @@ -610,13 +613,14 @@ define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { ; GFX6-LABEL: ashr_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 +; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: v_ashrrev_i32_e32 v1, s1, v1 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0 -; GFX6-NEXT: s_and_b32 s0, s1, s2 -; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX6-NEXT: v_ashrrev_i32_e32 v1, s0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll @@ -369,19 +369,18 @@ define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) { ; GFX7-LABEL: s_bswap_v2i16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s3, 0xffff +; GFX7-NEXT: s_lshr_b32 s1, s0, 16 +; GFX7-NEXT: s_and_b32 s3, s0, 0xffff ; GFX7-NEXT: s_lshl_b32 s2, s0, 8 -; GFX7-NEXT: s_and_b32 s0, s0, s3 -; GFX7-NEXT: s_lshr_b32 s0, s0, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_lshl_b32 s2, s1, 8 -; GFX7-NEXT: s_and_b32 s1, s1, s3 -; GFX7-NEXT: s_lshr_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_bfe_u32 s1, s1, 0x100000 -; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000 -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_lshl_b32 s1, s1, 8 +; GFX7-NEXT: s_lshr_b32 s0, s0, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: s_lshr_b32 s3, s3, 8 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX7-NEXT: s_or_b32 s2, s3, s2 +; GFX7-NEXT: s_bfe_u32 s1, s2, 0x100000 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s1, s0 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_bswap_v2i16: @@ -468,15 +467,15 @@ ; GFX7-LABEL: v_bswap_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bswap_v2i16: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -67,22 +67,26 @@ define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY4]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[COPY6]] - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK: $vgpr0 = COPY [[COPY7]](s32) - ; CHECK: $vgpr1 = COPY [[COPY8]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY6]](s32) + ; CHECK: $vgpr1 = COPY [[COPY7]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY8]], implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll @@ -406,44 +406,48 @@ ; GFX6-IEEE-LABEL: v_fdiv_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 -; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v4, v5, v7 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v3, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v6 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v6, v1, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v3, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v1, v6 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-FLUSH-NEXT: v_fma_f32 v5, v7, v5, v5 @@ -452,25 +456,25 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v5 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v5, v1, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v4, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v4, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v5 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -525,15 +529,17 @@ ; GFX6-LABEL: v_fdiv_v2f16_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v3, v3 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_rcp_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -567,44 +573,48 @@ ; GFX6-IEEE-LABEL: v_fdiv_v2f16_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 -; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v4, v5, v7 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v3, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v6 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v6, v1, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v3, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v1, v6 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-FLUSH-NEXT: v_fma_f32 v5, v7, v5, v5 @@ -613,25 +623,25 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v5 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v5, v1, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v4, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v4, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v5 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -686,32 +696,33 @@ ; GFX6-IEEE-LABEL: v_rcp_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 ; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; @@ -719,11 +730,12 @@ ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FLUSH-NEXT: s_movk_i32 s6, 0x3c00 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, s6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, s6 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 @@ -733,24 +745,24 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v1, v4 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -801,32 +813,33 @@ ; GFX6-IEEE-LABEL: v_rcp_v2f16_arcp: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 ; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; @@ -834,11 +847,12 @@ ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FLUSH-NEXT: s_movk_i32 s6, 0x3c00 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, s6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, s6 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 @@ -848,24 +862,24 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v1, v4 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -916,14 +930,15 @@ ; GFX6-LABEL: v_rcp_v2f16_arcp_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, 1.0 -; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_f32_e32 v0, v2, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX6-NEXT: v_rcp_f32_e32 v3, v0 +; GFX6-NEXT: v_mul_f32_e32 v0, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, v2, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -953,32 +968,33 @@ ; GFX6-IEEE-LABEL: v_rcp_v2f16_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 ; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; @@ -986,11 +1002,12 @@ ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FLUSH-NEXT: s_movk_i32 s6, 0x3c00 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, s6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, s6 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 @@ -1000,24 +1017,24 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v1, v4 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -1047,15 +1064,17 @@ ; GFX6-LABEL: v_fdiv_v2f16_afn_ulp25: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v3, v3 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_rcp_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1089,44 +1108,48 @@ ; GFX6-IEEE-LABEL: v_fdiv_v2f16_arcp_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 -; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v4, v5, v7 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v3, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v6 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v6, v1, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v3, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v1, v6 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16_arcp_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-FLUSH-NEXT: v_fma_f32 v5, v7, v5, v5 @@ -1135,25 +1158,25 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v5 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v5, v1, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v4, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v4, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v5 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -1208,15 +1231,17 @@ ; GFX6-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v3, v3 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_rcp_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -80,15 +80,18 @@ ; GFX6-LABEL: v_fma_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 +; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 +; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -118,20 +121,19 @@ ; GFX6-LABEL: v_fma_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 +; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 +; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -163,20 +165,19 @@ ; GFX6-LABEL: v_fma_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 +; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 +; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -208,27 +209,21 @@ ; GFX6-LABEL: v_fma_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v6, 0xffff -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, v0, v6 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, v2, v6 ; GFX6-NEXT: s_mov_b32 s4, 0x80008000 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 ; GFX6-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX6-NEXT: v_fma_f32 v0, v0, v1, v4 +; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_fma_f32 v1, v2, v3, v5 +; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -109,15 +109,17 @@ ; GFX6-LABEL: v_pow_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_log_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_log_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 @@ -169,23 +171,21 @@ ; GFX6-LABEL: v_pow_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_log_f32_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_log_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_exp_f32_e32 v2, v0 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v1 -; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_lhs: @@ -237,20 +237,18 @@ ; GFX6-LABEL: v_pow_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-NEXT: v_log_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_log_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 @@ -305,15 +303,8 @@ ; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: s_mov_b32 s4, 0x80008000 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 ; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -1194,9 +1194,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll @@ -14,8 +14,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32) @@ -37,8 +37,8 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32) @@ -66,9 +66,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) @@ -90,9 +90,9 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) @@ -120,10 +120,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -149,10 +149,10 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -184,9 +184,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) @@ -208,9 +208,9 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) @@ -238,10 +238,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -267,10 +267,10 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -302,10 +302,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -331,10 +331,10 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -366,13 +366,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) @@ -389,13 +389,13 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) @@ -418,14 +418,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -442,14 +442,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -472,15 +472,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -501,15 +501,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -536,14 +536,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -560,14 +560,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -590,15 +590,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -619,15 +619,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -654,15 +654,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -683,15 +683,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -2884,8 +2884,8 @@ ; GPRIDX-NEXT: s_mov_b32 s28, s30 ; GPRIDX-NEXT: s_mov_b32 s29, s31 ; GPRIDX-NEXT: s_mov_b32 s31, s33 -; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 ; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 @@ -2956,8 +2956,8 @@ ; MOVREL-NEXT: s_mov_b32 s28, s30 ; MOVREL-NEXT: s_mov_b32 s29, s31 ; MOVREL-NEXT: s_mov_b32 s31, s33 -; MOVREL-NEXT: v_mov_b32_e32 v32, v0 ; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: v_mov_b32_e32 v32, v0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: s_mov_b32 m0, s34 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -131,8 +131,8 @@ ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 @@ -838,9 +838,9 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store 6 into `i48 addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i48 @external_i48_func_void() @@ -896,9 +896,9 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) ; GCN: G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 @@ -956,9 +956,9 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) ; GCN: G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 @@ -1016,8 +1016,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i64 @external_i64_func_void() @@ -1073,8 +1073,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store 8 into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i8 addrspace(1)* @external_p1_func_void() @@ -1132,10 +1132,10 @@ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store 16 into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x i8 addrspace(1)*> @external_v2p1_func_void() @@ -1246,8 +1246,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store 8 into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3) ; GCN: S_ENDPGM 0 %val = call <2 x i8 addrspace(3)*> @external_v2p3_func_void() @@ -1414,8 +1414,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store 8 into `double addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call double @external_f64_func_void() @@ -1473,10 +1473,10 @@ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x double> @external_v2f64_func_void() @@ -1532,8 +1532,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x i32> @external_v2i32_func_void() @@ -1590,8 +1590,8 @@ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store 12 into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x i32> @external_v3i32_func_void() @@ -1649,8 +1649,8 @@ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <4 x i32> @external_v4i32_func_void() @@ -1709,8 +1709,8 @@ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store 20 into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <5 x i32> @external_v5i32_func_void() @@ -1772,8 +1772,8 @@ ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <8 x i32> @external_v8i32_func_void() @@ -1843,8 +1843,8 @@ ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <16 x i32> @external_v16i32_func_void() @@ -1930,8 +1930,8 @@ ; GCN: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <32 x i32> @external_v32i32_func_void() @@ -2042,10 +2042,10 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() @@ -2101,8 +2101,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <4 x i16> @external_v4i16_func_void() @@ -2213,10 +2213,10 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() @@ -2272,8 +2272,8 @@ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <4 x half> @external_v4f16_func_void() @@ -2330,8 +2330,8 @@ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x float> @external_v3f32_func_void() @@ -2390,8 +2390,8 @@ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store 20 into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <5 x float> @external_v5f32_func_void() @@ -2450,8 +2450,8 @@ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 @@ -2478,8 +2478,8 @@ ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -82,11 +82,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -103,11 +103,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -124,11 +124,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -308,9 +308,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store 6 into `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -325,9 +325,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) @@ -347,9 +347,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) @@ -369,8 +369,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -386,9 +386,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store 12 into `i95 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -404,9 +404,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) @@ -427,9 +427,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) @@ -450,8 +450,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -466,8 +466,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](p0), [[DEF]](p1) :: (store 8 into `i8* addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -482,8 +482,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](p1), [[DEF]](p1) :: (store 8 into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -527,8 +527,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `double addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -543,8 +543,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -559,9 +559,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store 6 into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -577,9 +577,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store 9 into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -596,9 +596,9 @@ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store 2 into `<2 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -617,9 +617,9 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store 3 into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -640,9 +640,9 @@ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store 4 into `<4 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -657,8 +657,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store 8 into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -674,8 +674,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -692,8 +692,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -711,8 +711,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store 20 into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] @@ -733,8 +733,8 @@ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] @@ -763,8 +763,8 @@ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] @@ -809,8 +809,8 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] @@ -858,8 +858,8 @@ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store 132 into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] @@ -876,10 +876,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -896,10 +896,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store 16 into `<2 x i8*> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -916,10 +916,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -938,11 +938,11 @@ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] @@ -963,12 +963,12 @@ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] @@ -991,13 +991,13 @@ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) + ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store 40 into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] @@ -1026,7 +1026,6 @@ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -1036,6 +1035,7 @@ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] @@ -1080,7 +1080,6 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -1098,6 +1097,7 @@ ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] @@ -1126,10 +1126,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -1144,8 +1144,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -1161,10 +1161,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -1181,8 +1181,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -1203,8 +1203,8 @@ ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] @@ -1253,10 +1253,10 @@ ; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store 130 into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] @@ -1271,8 +1271,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -1288,8 +1288,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -1306,8 +1306,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -1328,8 +1328,8 @@ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] @@ -1358,8 +1358,8 @@ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] @@ -1376,10 +1376,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -1398,11 +1398,11 @@ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] @@ -1423,12 +1423,12 @@ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] @@ -1457,7 +1457,6 @@ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -1467,6 +1466,7 @@ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] @@ -1511,7 +1511,6 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -1529,6 +1528,7 @@ ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] @@ -1557,10 +1557,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -1575,8 +1575,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -1593,8 +1593,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -1615,8 +1615,8 @@ ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] @@ -1633,9 +1633,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) @@ -1803,15 +1803,15 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -1863,6 +1863,7 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 @@ -1872,7 +1873,6 @@ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -1929,12 +1929,12 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -1985,18 +1985,18 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -2047,12 +2047,12 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -2103,6 +2103,7 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2111,6 +2112,9 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 @@ -2119,14 +2123,10 @@ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -2177,6 +2177,7 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2185,6 +2186,7 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 @@ -2193,10 +2195,8 @@ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -2247,6 +2247,7 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2263,6 +2264,7 @@ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2279,10 +2281,8 @@ ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -2333,6 +2333,7 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.31, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 @@ -2365,6 +2366,7 @@ ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.17, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.16, addrspace 5) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2397,10 +2399,8 @@ ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -2423,9 +2423,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2457,9 +2457,9 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2520,9 +2520,9 @@ ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) ; CHECK: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] @@ -2568,6 +2568,7 @@ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 2 from %fixed-stack.15, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2600,10 +2601,9 @@ ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD]](s16), [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16) ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -10,17 +10,17 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] @@ -44,11 +44,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] @@ -72,13 +72,13 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) @@ -105,13 +105,13 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -139,11 +139,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) ; CHECK: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -171,17 +171,17 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -59,8 +59,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -9,9 +9,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) @@ -27,9 +27,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -44,9 +44,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) @@ -63,8 +63,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1) @@ -81,8 +81,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -98,8 +98,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1) @@ -115,9 +115,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) @@ -133,9 +133,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -150,9 +150,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) @@ -168,10 +168,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store 1 into %ir.dst, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] @@ -186,10 +186,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst, addrspace 1) @@ -205,10 +205,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst, addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -7,11 +7,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -28,9 +28,9 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -47,10 +47,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -67,10 +67,10 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -88,8 +88,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -42,11 +42,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -64,11 +64,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -121,11 +121,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -143,11 +143,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -200,11 +200,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -222,11 +222,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -279,11 +279,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -301,11 +301,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -358,11 +358,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -380,11 +380,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -437,11 +437,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -459,11 +459,11 @@ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -14,10 +14,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -32,10 +32,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -57,10 +57,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -75,10 +75,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -100,10 +100,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -118,10 +118,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -143,10 +143,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -161,10 +161,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -187,10 +187,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -205,10 +205,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -230,10 +230,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -248,10 +248,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -273,10 +273,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -291,10 +291,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -316,10 +316,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -334,10 +334,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -359,10 +359,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -377,10 +377,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -402,10 +402,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -420,10 +420,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -445,10 +445,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -463,10 +463,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -488,10 +488,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -506,10 +506,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -531,11 +531,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) @@ -551,11 +551,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) @@ -578,10 +578,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -599,10 +599,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -627,11 +627,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -653,11 +653,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -686,11 +686,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -712,11 +712,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -745,10 +745,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -766,10 +766,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) @@ -794,11 +794,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -820,11 +820,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -853,11 +853,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -879,11 +879,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) @@ -912,12 +912,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) @@ -939,12 +939,12 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) @@ -973,10 +973,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -991,10 +991,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -1016,11 +1016,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) @@ -1039,11 +1039,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) @@ -1069,12 +1069,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) @@ -1097,12 +1097,12 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) @@ -1132,13 +1132,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) @@ -1161,13 +1161,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -14,8 +14,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -38,8 +38,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -69,8 +69,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -98,8 +98,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -135,9 +135,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -171,9 +171,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -216,9 +216,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -252,9 +252,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -297,8 +297,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -326,8 +326,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -363,9 +363,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -399,9 +399,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -444,9 +444,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -480,9 +480,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -525,9 +525,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -563,9 +563,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -611,8 +611,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -640,8 +640,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -677,9 +677,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -713,9 +713,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -758,9 +758,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -796,9 +796,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -844,9 +844,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -882,9 +882,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -930,9 +930,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -966,9 +966,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1011,9 +1011,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1049,9 +1049,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1097,13 +1097,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1121,13 +1121,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1152,13 +1152,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1181,13 +1181,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1218,14 +1218,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1254,14 +1254,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1299,14 +1299,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1335,14 +1335,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1380,13 +1380,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1409,13 +1409,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1446,14 +1446,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1482,14 +1482,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1527,14 +1527,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1563,14 +1563,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1608,14 +1608,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1646,14 +1646,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1694,13 +1694,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1723,13 +1723,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1760,14 +1760,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1796,14 +1796,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1841,14 +1841,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1879,14 +1879,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1927,14 +1927,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -1965,14 +1965,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2013,14 +2013,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2049,14 +2049,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2094,14 +2094,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2132,14 +2132,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2180,8 +2180,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2204,8 +2204,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2235,8 +2235,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2259,8 +2259,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2290,8 +2290,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2314,8 +2314,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2345,8 +2345,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2369,8 +2369,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2400,8 +2400,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2424,8 +2424,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2455,8 +2455,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2479,8 +2479,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2510,8 +2510,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2534,8 +2534,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2565,8 +2565,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2589,8 +2589,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2620,8 +2620,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2640,8 +2640,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2667,8 +2667,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2689,8 +2689,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2718,9 +2718,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2738,9 +2738,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2765,11 +2765,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2787,11 +2787,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2816,8 +2816,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2840,8 +2840,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2871,8 +2871,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2895,8 +2895,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2926,8 +2926,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2950,8 +2950,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -2981,13 +2981,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -3005,13 +3005,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -3036,13 +3036,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -3060,13 +3060,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -3091,13 +3091,13 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -3115,13 +3115,13 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -3191,8 +3191,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3217,8 +3217,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3253,8 +3253,8 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3284,8 +3284,8 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3326,9 +3326,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3364,9 +3364,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3414,9 +3414,9 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) @@ -3454,9 +3454,9 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -14,9 +14,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) @@ -33,9 +33,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) @@ -57,9 +57,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) @@ -85,9 +85,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) @@ -108,9 +108,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) @@ -151,9 +151,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) @@ -202,9 +202,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -238,9 +238,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) @@ -263,9 +263,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -285,9 +285,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -315,9 +315,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -345,9 +345,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -375,9 +375,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) @@ -420,9 +420,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) @@ -478,9 +478,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") @@ -516,9 +516,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") @@ -585,9 +585,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -611,9 +611,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) @@ -671,9 +671,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) @@ -712,9 +712,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF @@ -762,9 +762,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF @@ -800,9 +800,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF @@ -941,9 +941,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) @@ -976,9 +976,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) @@ -1001,9 +1001,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) @@ -1034,9 +1034,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1059,9 +1059,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1088,9 +1088,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1154,9 +1154,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1176,9 +1176,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1206,9 +1206,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1235,9 +1235,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1265,9 +1265,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1294,9 +1294,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1324,9 +1324,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -1367,9 +1367,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -1424,9 +1424,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1465,9 +1465,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1522,9 +1522,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1563,9 +1563,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1620,9 +1620,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) @@ -1657,9 +1657,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) @@ -1689,9 +1689,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -1724,9 +1724,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -1756,9 +1756,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1788,9 +1788,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1820,9 +1820,9 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") @@ -1852,9 +1852,9 @@ ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -13,9 +13,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) @@ -36,9 +36,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) @@ -61,9 +61,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) @@ -87,9 +87,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -114,9 +114,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -143,9 +143,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") @@ -173,9 +173,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) @@ -204,9 +204,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") @@ -257,9 +257,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -305,9 +305,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) @@ -332,9 +332,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -382,9 +382,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) @@ -410,9 +410,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) @@ -438,9 +438,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -490,9 +490,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -519,9 +519,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -550,9 +550,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -581,9 +581,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") @@ -613,9 +613,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -645,9 +645,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -677,9 +677,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) @@ -710,9 +710,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") @@ -743,9 +743,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -776,9 +776,9 @@ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -14,11 +14,11 @@ ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -38,11 +38,11 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -66,14 +66,14 @@ ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX6: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32) ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) @@ -94,14 +94,14 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX10NSA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA: G_STORE [[UV4]](s32), [[MV]](p1) :: (store 4 into %ir.out, addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -14,10 +14,10 @@ ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GFX6: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) @@ -33,10 +33,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -56,10 +56,10 @@ ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX6: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") @@ -78,10 +78,10 @@ ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll @@ -14,14 +14,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -40,14 +40,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) @@ -72,14 +72,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -101,14 +101,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -136,15 +136,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -170,15 +170,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -210,15 +210,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -244,15 +244,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -284,14 +284,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -313,14 +313,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -348,15 +348,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -382,15 +382,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -422,14 +422,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -453,14 +453,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -490,15 +490,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -522,15 +522,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -560,14 +560,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -589,14 +589,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -624,15 +624,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -658,15 +658,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -698,15 +698,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -730,15 +730,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -768,16 +768,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -804,16 +804,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -845,14 +845,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -876,14 +876,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -913,15 +913,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -945,15 +945,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -983,15 +983,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1016,15 +1016,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1054,16 +1054,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1088,16 +1088,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1127,15 +1127,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1159,15 +1159,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1197,16 +1197,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1233,16 +1233,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1274,16 +1274,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1308,16 +1308,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1347,17 +1347,17 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1385,17 +1385,17 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1428,15 +1428,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) @@ -1463,15 +1463,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) @@ -1503,18 +1503,18 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) @@ -1543,18 +1543,18 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) @@ -1588,10 +1588,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1601,8 +1603,6 @@ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) @@ -1638,10 +1638,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1651,8 +1653,6 @@ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) @@ -1693,16 +1693,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1730,16 +1730,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1772,10 +1772,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1783,8 +1785,6 @@ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1814,10 +1814,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1825,8 +1827,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -1861,16 +1861,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) @@ -1898,16 +1898,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) @@ -1940,10 +1940,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1951,8 +1953,6 @@ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) @@ -1984,10 +1984,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1995,8 +1997,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) @@ -2033,17 +2033,17 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2072,17 +2072,17 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2116,10 +2116,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2128,8 +2130,6 @@ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -2162,10 +2162,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2174,8 +2176,6 @@ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -2213,15 +2213,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) @@ -2248,15 +2248,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) @@ -2288,18 +2288,18 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) @@ -2328,18 +2328,18 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) @@ -2373,16 +2373,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2410,16 +2410,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2452,10 +2452,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2463,8 +2465,6 @@ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -2494,10 +2494,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2505,8 +2507,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -2541,16 +2541,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) @@ -2578,16 +2578,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) @@ -2620,10 +2620,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2631,8 +2633,6 @@ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) @@ -2664,10 +2664,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2675,8 +2677,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) @@ -2713,17 +2713,17 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2752,17 +2752,17 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2796,10 +2796,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2808,8 +2810,6 @@ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -2842,10 +2842,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2854,8 +2856,6 @@ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -2893,14 +2893,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -2922,14 +2922,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -2957,15 +2957,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -2991,15 +2991,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) @@ -3031,15 +3031,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3063,15 +3063,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3101,16 +3101,16 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3137,16 +3137,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3178,14 +3178,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -3204,14 +3204,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) @@ -3236,14 +3236,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -3265,14 +3265,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -3300,14 +3300,14 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -3331,14 +3331,14 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -3368,15 +3368,15 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3400,15 +3400,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3438,10 +3438,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -3451,8 +3453,6 @@ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3482,10 +3482,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -3495,8 +3497,6 @@ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3531,10 +3531,12 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -3544,8 +3546,6 @@ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -3577,10 +3577,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -3590,8 +3592,6 @@ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -13,15 +13,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) @@ -51,18 +51,18 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) @@ -93,10 +93,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -106,8 +108,6 @@ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) @@ -143,16 +143,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -183,10 +183,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -194,8 +196,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -227,16 +227,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) @@ -266,10 +266,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -277,8 +279,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) @@ -309,17 +309,17 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -350,10 +350,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -362,8 +364,6 @@ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -395,15 +395,15 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) @@ -433,18 +433,18 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) @@ -475,16 +475,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -515,10 +515,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -526,8 +528,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -559,16 +559,16 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) @@ -598,10 +598,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -609,8 +611,6 @@ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) @@ -641,17 +641,17 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -682,10 +682,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -694,8 +696,6 @@ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -727,10 +727,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -740,8 +742,6 @@ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) @@ -770,10 +770,12 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -783,8 +785,6 @@ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -16,11 +16,11 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") ; UNPACKED: S_ENDPGM 0 @@ -35,11 +35,11 @@ ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX81: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX81: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") ; GFX81: S_ENDPGM 0 @@ -54,11 +54,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") ; GFX9: S_ENDPGM 0 @@ -73,11 +73,11 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") ; GFX10: S_ENDPGM 0 @@ -97,10 +97,10 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -121,10 +121,10 @@ ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -142,10 +142,10 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "ImageResource") ; GFX9: S_ENDPGM 0 @@ -160,10 +160,10 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "ImageResource") ; GFX10: S_ENDPGM 0 @@ -183,11 +183,11 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) @@ -213,11 +213,11 @@ ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) ; GFX81: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) @@ -257,11 +257,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) @@ -291,11 +291,11 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) @@ -330,11 +330,11 @@ ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -359,11 +359,11 @@ ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) @@ -383,11 +383,11 @@ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "ImageResource") @@ -403,11 +403,11 @@ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "ImageResource") Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -11,9 +11,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -31,9 +31,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -48,14 +48,14 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 @@ -73,14 +73,14 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -98,9 +98,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -147,9 +147,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -192,9 +192,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -214,9 +214,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -12,9 +12,9 @@ ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 @@ -36,9 +36,9 @@ ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 @@ -59,9 +59,9 @@ ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -112,9 +112,9 @@ ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -161,9 +161,9 @@ ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -11,9 +11,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -29,9 +29,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 @@ -48,8 +48,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -66,8 +66,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) @@ -85,9 +85,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -132,8 +132,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -175,9 +175,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 @@ -195,9 +195,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) @@ -213,9 +213,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -231,8 +231,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -11,9 +11,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -24,9 +24,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -42,9 +42,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -55,9 +55,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 @@ -90,9 +90,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 @@ -106,9 +106,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 @@ -147,9 +147,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -187,9 +187,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -231,9 +231,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 @@ -247,9 +247,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -10,9 +10,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -28,9 +28,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 @@ -49,9 +49,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 @@ -72,9 +72,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 @@ -99,9 +99,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -143,9 +143,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -11,9 +11,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -30,9 +30,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -51,9 +51,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -93,9 +93,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -137,9 +137,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -156,9 +156,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -175,9 +175,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -194,9 +194,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -213,9 +213,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -232,9 +232,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -251,9 +251,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 @@ -272,9 +272,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 @@ -295,9 +295,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 @@ -320,9 +320,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -338,9 +338,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -362,9 +362,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 @@ -383,9 +383,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -403,9 +403,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec ; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]] @@ -426,9 +426,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -468,9 +468,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -510,8 +510,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -527,8 +527,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -544,8 +544,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4) @@ -563,9 +563,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -582,9 +582,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -601,9 +601,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec @@ -623,8 +623,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -641,8 +641,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -659,9 +659,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) @@ -680,9 +680,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) @@ -701,9 +701,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) @@ -724,9 +724,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -769,9 +769,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %13:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -10,10 +10,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 @@ -23,10 +23,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -41,9 +41,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 @@ -53,9 +53,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -70,10 +70,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec @@ -87,10 +87,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -105,11 +105,11 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec @@ -125,12 +125,12 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -147,11 +147,11 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec @@ -190,12 +190,12 @@ ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -232,9 +232,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -249,9 +249,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -267,9 +267,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -284,9 +284,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -302,10 +302,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec @@ -319,10 +319,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 @@ -338,10 +338,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec @@ -355,10 +355,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 @@ -374,10 +374,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -394,10 +394,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -419,11 +419,11 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec @@ -465,12 +465,12 @@ ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -10,10 +10,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -28,9 +28,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -45,12 +45,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -65,13 +65,13 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -86,14 +86,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -109,14 +109,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -153,11 +153,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -173,11 +173,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -193,12 +193,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 @@ -214,12 +214,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 @@ -235,12 +235,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec @@ -262,14 +262,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -11,10 +11,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -30,10 +30,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) @@ -52,10 +52,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -94,10 +94,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -126,10 +126,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -169,10 +169,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) @@ -187,10 +187,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) @@ -205,10 +205,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) @@ -223,10 +223,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) @@ -241,10 +241,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) @@ -259,10 +259,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) @@ -277,10 +277,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) @@ -295,12 +295,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -315,13 +315,13 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -336,14 +336,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -358,10 +358,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 @@ -377,10 +377,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 @@ -396,10 +396,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -414,10 +414,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -432,12 +432,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -453,12 +453,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -495,9 +495,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -512,9 +512,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) @@ -531,10 +531,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 @@ -550,10 +550,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 @@ -569,10 +569,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -591,9 +591,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -609,9 +609,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -627,10 +627,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 @@ -646,10 +646,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 @@ -665,10 +665,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -689,10 +689,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -735,9 +735,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -10,9 +10,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -23,9 +23,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -41,9 +41,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 @@ -65,9 +65,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -89,9 +89,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 @@ -123,9 +123,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 @@ -145,9 +145,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -185,9 +185,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -229,9 +229,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -242,9 +242,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -260,9 +260,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -273,9 +273,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -291,9 +291,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -304,9 +304,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -322,9 +322,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -335,9 +335,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -9,9 +9,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -27,9 +27,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 @@ -48,9 +48,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 @@ -71,9 +71,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 @@ -97,9 +97,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -141,9 +141,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -159,9 +159,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -177,9 +177,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -195,9 +195,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -11,9 +11,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -24,9 +24,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -42,9 +42,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec @@ -59,9 +59,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -84,9 +84,9 @@ ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec @@ -100,14 +100,14 @@ ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -125,9 +125,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -161,9 +161,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -203,9 +203,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -242,9 +242,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -287,9 +287,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -327,9 +327,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -371,9 +371,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc @@ -384,9 +384,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) @@ -402,9 +402,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -415,9 +415,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) @@ -433,9 +433,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc @@ -446,9 +446,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) @@ -464,9 +464,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc @@ -477,9 +477,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -11,9 +11,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -24,9 +24,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -44,9 +44,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -80,9 +80,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -122,9 +122,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -161,9 +161,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -206,9 +206,9 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; UNPACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -246,9 +246,9 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -11,9 +11,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -27,14 +27,14 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -49,14 +49,14 @@ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -72,14 +72,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -96,9 +96,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -117,9 +117,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo @@ -159,9 +159,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo @@ -204,9 +204,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -249,9 +249,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) @@ -268,9 +268,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) @@ -287,9 +287,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) @@ -306,9 +306,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) @@ -326,8 +326,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0) @@ -343,8 +343,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) @@ -360,8 +360,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) @@ -379,9 +379,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset = add i32 %voffset.base, 16 @@ -398,9 +398,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 @@ -417,9 +417,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -439,8 +439,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -457,8 +457,8 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -475,9 +475,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) @@ -496,9 +496,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) @@ -517,9 +517,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) @@ -540,9 +540,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -585,9 +585,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -14,8 +14,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec @@ -28,8 +28,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec @@ -42,8 +42,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec @@ -61,8 +61,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec @@ -75,8 +75,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec @@ -89,8 +89,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec @@ -108,8 +108,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 @@ -127,8 +127,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 @@ -146,8 +146,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 @@ -170,8 +170,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 @@ -197,8 +197,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 @@ -224,8 +224,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 @@ -256,8 +256,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 @@ -299,8 +299,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 @@ -342,8 +342,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 @@ -390,8 +390,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 @@ -465,8 +465,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 @@ -540,8 +540,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 @@ -1488,8 +1488,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -1501,8 +1501,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -1514,8 +1514,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -1532,8 +1532,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 @@ -1548,8 +1548,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 @@ -1564,8 +1564,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 @@ -1585,8 +1585,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF @@ -1608,8 +1608,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF @@ -1631,8 +1631,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF @@ -1659,8 +1659,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -1679,8 +1679,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -1699,8 +1699,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -1724,8 +1724,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -1754,8 +1754,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -1784,8 +1784,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -1819,8 +1819,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -1867,8 +1867,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -1915,8 +1915,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -1968,8 +1968,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -1981,8 +1981,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -1994,8 +1994,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2013,8 +2013,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2026,8 +2026,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2039,8 +2039,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2058,8 +2058,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2071,8 +2071,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2084,8 +2084,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -2104,8 +2104,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2134,8 +2134,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2164,8 +2164,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2201,8 +2201,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2231,8 +2231,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2261,8 +2261,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2297,8 +2297,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2345,8 +2345,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2393,8 +2393,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2447,8 +2447,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2495,8 +2495,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2543,8 +2543,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) @@ -2598,8 +2598,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -2633,8 +2633,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -2668,8 +2668,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -2709,8 +2709,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -2742,8 +2742,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -2775,8 +2775,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -2815,8 +2815,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -2852,8 +2852,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -2889,8 +2889,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3148,8 +3148,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -3198,8 +3198,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -3248,8 +3248,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -3306,8 +3306,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3360,8 +3360,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3414,8 +3414,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3474,8 +3474,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3528,8 +3528,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3582,8 +3582,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] @@ -3642,8 +3642,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -3693,8 +3693,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -3744,8 +3744,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -3801,8 +3801,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -3852,8 +3852,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -3903,8 +3903,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -3960,8 +3960,8 @@ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -4011,8 +4011,8 @@ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -4062,8 +4062,8 @@ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 @@ -4274,9 +4274,9 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4287,9 +4287,9 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4300,9 +4300,9 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4319,9 +4319,9 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4332,9 +4332,9 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4345,9 +4345,9 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4364,9 +4364,9 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -4380,9 +4380,9 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -4396,9 +4396,9 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -4419,9 +4419,9 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -4435,9 +4435,9 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -4451,9 +4451,9 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -4475,9 +4475,9 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) @@ -4490,9 +4490,9 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) @@ -4505,9 +4505,9 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) @@ -4527,9 +4527,9 @@ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec @@ -4543,9 +4543,9 @@ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec @@ -4559,9 +4559,9 @@ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -11,10 +11,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] @@ -33,10 +33,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] @@ -52,15 +52,15 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 @@ -79,15 +79,15 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -106,10 +106,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -158,10 +158,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -207,10 +207,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -13,10 +13,10 @@ ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) @@ -39,10 +39,10 @@ ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) @@ -64,10 +64,10 @@ ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -120,10 +120,10 @@ ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -172,10 +172,10 @@ ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -11,10 +11,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -31,10 +31,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -52,9 +52,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) @@ -71,9 +71,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) @@ -91,10 +91,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -141,9 +141,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -187,10 +187,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -207,9 +207,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) @@ -225,10 +225,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -245,9 +245,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -10,10 +10,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] @@ -25,10 +25,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] @@ -45,10 +45,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 @@ -71,10 +71,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] @@ -97,10 +97,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 @@ -133,10 +133,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 @@ -158,10 +158,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -222,10 +222,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -272,10 +272,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] @@ -287,10 +287,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] @@ -308,10 +308,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] @@ -323,10 +323,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -10,10 +10,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] @@ -30,10 +30,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 @@ -53,10 +53,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 @@ -78,10 +78,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 @@ -107,10 +107,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -161,10 +161,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] @@ -182,10 +182,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -11,10 +11,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] @@ -32,10 +32,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 @@ -56,10 +56,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 @@ -82,10 +82,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 @@ -110,9 +110,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 @@ -132,10 +132,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] @@ -153,9 +153,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) @@ -175,10 +175,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -222,10 +222,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] @@ -244,10 +244,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec @@ -267,10 +267,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] @@ -289,10 +289,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec @@ -313,10 +313,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] @@ -334,10 +334,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] @@ -361,10 +361,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 @@ -385,10 +385,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -11,10 +11,10 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 @@ -26,10 +26,10 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -46,10 +46,10 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec @@ -65,10 +65,10 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -92,10 +92,10 @@ ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; UNPACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY]], implicit $exec @@ -110,15 +110,15 @@ ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -136,10 +136,10 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -180,10 +180,10 @@ ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; PACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -228,10 +228,10 @@ ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 @@ -243,10 +243,10 @@ ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -10,10 +10,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -27,15 +27,15 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -50,15 +50,15 @@ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -74,15 +74,15 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -100,10 +100,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] @@ -148,10 +148,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -12,10 +12,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -29,15 +29,15 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -52,15 +52,15 @@ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -76,15 +76,15 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -101,15 +101,15 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] ; CHECK: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] ; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] @@ -154,10 +154,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) ; CHECK: S_ENDPGM 0 @@ -175,10 +175,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -196,10 +196,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -216,10 +216,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -239,15 +239,15 @@ ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -11,10 +11,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] @@ -26,10 +26,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] @@ -46,10 +46,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] @@ -61,10 +61,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 @@ -98,10 +98,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 @@ -116,10 +116,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 @@ -157,9 +157,9 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 @@ -173,9 +173,9 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 @@ -195,10 +195,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -241,10 +241,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -309,10 +309,10 @@ ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] @@ -324,10 +324,10 @@ ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -10,10 +10,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] @@ -30,10 +30,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 @@ -53,10 +53,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 @@ -78,10 +78,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 @@ -105,9 +105,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 @@ -127,10 +127,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 @@ -181,10 +181,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -485,12 +485,12 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_v2i16: @@ -514,11 +514,10 @@ ; GFX6-LABEL: v_lshr_v2i16_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 15, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 15, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 15, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 31, v0 +; GFX6-NEXT: v_mov_b32_e32 v0, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_v2i16_15: @@ -543,12 +542,12 @@ ; GFX6-LABEL: s_lshr_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s4 -; GFX6-NEXT: s_and_b32 s0, s0, s4 -; GFX6-NEXT: s_lshr_b32 s0, s0, s2 -; GFX6-NEXT: s_and_b32 s2, s3, s4 +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s4 -; GFX6-NEXT: s_lshr_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s0, s0, s4 +; GFX6-NEXT: s_lshr_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s1, s2, s3 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -586,13 +585,13 @@ define amdgpu_ps float @lshr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) { ; GFX6-LABEL: lshr_v2i16_sv: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 -; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 -; GFX6-NEXT: s_and_b32 s0, s1, s2 -; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog @@ -618,13 +617,13 @@ define amdgpu_ps float @lshr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { ; GFX6-LABEL: lshr_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 -; GFX6-NEXT: s_and_b32 s0, s1, s2 -; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog Index: llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -285,15 +285,8 @@ define amdgpu_ps i32 @s_orn2_v2i16(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { ; GFX6-LABEL: s_orn2_v2i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_and_b32 s1, s4, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_or_b32 s0, s2, s0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_v2i16: @@ -309,15 +302,8 @@ define amdgpu_ps i32 @s_orn2_v2i16_commute(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { ; GFX6-LABEL: s_orn2_v2i16_commute: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_xor_b32 s0, s3, -1 ; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_and_b32 s1, s4, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_or_b32 s0, s1, s0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_v2i16_commute: @@ -333,15 +319,8 @@ define amdgpu_ps { i32, i32 } @s_orn2_v2i16_multi_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { ; GFX6-LABEL: s_orn2_v2i16_multi_use: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_and_b32 s1, s4, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_xor_b32 s1, s3, -1 +; GFX6-NEXT: s_or_b32 s0, s2, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_v2i16_multi_use: @@ -362,19 +341,9 @@ define amdgpu_ps { i32, i32 } @s_orn2_v2i16_multi_foldable_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1, <2 x i16> inreg %src2) { ; GFX6-LABEL: s_orn2_v2i16_multi_foldable_use: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b32 s1, 0xffff -; GFX6-NEXT: s_lshl_b32 s0, s3, 16 -; GFX6-NEXT: s_and_b32 s2, s2, s1 -; GFX6-NEXT: s_or_b32 s0, s0, s2 -; GFX6-NEXT: s_and_b32 s3, s4, s1 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_or_b32 s2, s2, s3 -; GFX6-NEXT: s_lshl_b32 s3, s7, 16 -; GFX6-NEXT: s_and_b32 s1, s6, s1 +; GFX6-NEXT: s_xor_b32 s1, s4, -1 +; GFX6-NEXT: s_or_b32 s0, s2, s1 ; GFX6-NEXT: s_or_b32 s1, s3, s1 -; GFX6-NEXT: s_xor_b32 s1, s1, -1 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_v2i16_multi_foldable_use: @@ -397,13 +366,6 @@ ; GFX6-LABEL: v_orn2_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -15,8 +15,8 @@ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; FAST: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) @@ -33,8 +33,8 @@ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) @@ -58,8 +58,8 @@ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") @@ -77,8 +77,8 @@ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") @@ -104,8 +104,8 @@ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -158,8 +158,8 @@ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -219,8 +219,8 @@ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF @@ -274,8 +274,8 @@ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -15,13 +15,13 @@ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; FAST: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 ; FAST: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 ; FAST: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) @@ -37,13 +37,13 @@ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; GREEDY: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 ; GREEDY: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 ; GREEDY: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) @@ -66,13 +66,13 @@ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; FAST: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 ; FAST: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 ; FAST: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 - ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; FAST: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") @@ -89,13 +89,13 @@ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; GREEDY: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 ; GREEDY: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 ; GREEDY: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") @@ -120,13 +120,13 @@ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; FAST: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; FAST: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; FAST: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -178,13 +178,13 @@ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -243,13 +243,13 @@ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -291,13 +291,13 @@ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -346,13 +346,13 @@ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 ; FAST: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 ; FAST: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 - ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -416,13 +416,13 @@ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -11,9 +11,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) @@ -32,9 +32,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) @@ -55,9 +55,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF @@ -102,9 +102,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF @@ -140,9 +140,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -11,8 +11,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -25,8 +25,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -44,8 +44,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) @@ -62,8 +62,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) @@ -85,8 +85,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) @@ -111,8 +111,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) @@ -142,8 +142,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) @@ -178,8 +178,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) @@ -219,8 +219,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) @@ -279,8 +279,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) @@ -345,8 +345,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) @@ -359,8 +359,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) @@ -378,8 +378,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) @@ -394,8 +394,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) @@ -415,8 +415,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -439,8 +439,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -468,8 +468,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -486,8 +486,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -509,8 +509,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -533,8 +533,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -562,8 +562,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -596,8 +596,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) @@ -635,8 +635,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -651,8 +651,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -674,8 +674,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -695,8 +695,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -723,8 +723,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -752,8 +752,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -788,8 +788,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -809,8 +809,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -837,8 +837,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -866,8 +866,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -902,8 +902,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -923,8 +923,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -951,8 +951,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -980,8 +980,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1016,8 +1016,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1037,8 +1037,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1065,8 +1065,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1094,8 +1094,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1129,8 +1129,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1146,8 +1146,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1169,8 +1169,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1186,8 +1186,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1209,8 +1209,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1225,8 +1225,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1248,8 +1248,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1275,8 +1275,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1309,8 +1309,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1335,8 +1335,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1367,8 +1367,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1404,8 +1404,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1447,8 +1447,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1483,8 +1483,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -1526,8 +1526,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1565,8 +1565,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1610,8 +1610,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1650,8 +1650,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -1697,8 +1697,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -1738,8 +1738,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -1955,8 +1955,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -2003,8 +2003,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -2059,8 +2059,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -2108,8 +2108,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -2163,8 +2163,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -2212,8 +2212,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -2267,8 +2267,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -2315,8 +2315,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -2369,8 +2369,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -2417,8 +2417,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -2471,8 +2471,8 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -2519,8 +2519,8 @@ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] @@ -2672,9 +2672,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -2688,9 +2688,9 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -2710,9 +2710,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -2726,9 +2726,9 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 @@ -2748,9 +2748,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 @@ -2768,9 +2768,9 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 @@ -2795,9 +2795,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 @@ -2815,9 +2815,9 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 @@ -2843,9 +2843,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -2861,9 +2861,9 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) @@ -2886,9 +2886,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] @@ -2905,9 +2905,9 @@ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -11,10 +11,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -31,10 +31,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) @@ -54,10 +54,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) @@ -100,10 +100,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -137,10 +137,10 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -11,11 +11,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -31,11 +31,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) @@ -55,11 +55,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -99,11 +99,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec ; CHECK: bb.2: @@ -134,11 +134,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec Index: llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -176,22 +176,24 @@ ; GFX6-LABEL: v_roundeven_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_rndne_f32_e32 v0, v0 -; GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-NEXT: v_rndne_f32_e32 v0, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_rndne_f32_e32 v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_roundeven_v2f16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_rndne_f32_e32 v0, v0 -; GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX7-NEXT: v_rndne_f32_e32 v0, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_rndne_f32_e32 v1, v2 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -221,9 +223,6 @@ ; GFX6-LABEL: v_roundeven_v2f16_fneg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -237,9 +236,6 @@ ; GFX7-LABEL: v_roundeven_v2f16_fneg: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -2702,19 +2702,21 @@ ; GFX6-LABEL: v_saddsat_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 ; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 -; GFX6-NEXT: v_max_i32_e32 v2, v5, v2 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 +; GFX6-NEXT: v_max_i32_e32 v1, v5, v1 +; GFX6-NEXT: v_min_i32_e32 v1, v1, v4 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_min_i32_e32 v4, 0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 @@ -2768,18 +2770,20 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: s_saddsat_v2i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: s_min_i32 s7, s0, 0 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_sub_i32 s7, s5, s7 ; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: s_max_i32 s6, s0, 0 ; GFX6-NEXT: s_sub_i32 s6, s4, s6 -; GFX6-NEXT: s_max_i32 s2, s7, s2 -; GFX6-NEXT: s_min_i32 s2, s2, s6 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_add_i32 s0, s0, s2 +; GFX6-NEXT: s_max_i32 s1, s7, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s6 +; GFX6-NEXT: s_add_i32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s2, 16 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 ; GFX6-NEXT: s_max_i32 s3, s1, 0 ; GFX6-NEXT: s_sub_i32 s3, s4, s3 @@ -2854,9 +2858,11 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { ; GFX6-LABEL: saddsat_v2i16_sv: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_brev_b32 s3, 1 ; GFX6-NEXT: s_min_i32 s5, s0, 0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_sub_i32 s5, s3, s5 ; GFX6-NEXT: s_brev_b32 s2, -2 @@ -2927,9 +2933,11 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: saddsat_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_brev_b32 s3, 1 ; GFX6-NEXT: v_min_i32_e32 v3, 0, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s3, v3 ; GFX6-NEXT: s_brev_b32 s2, -2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -454,13 +454,9 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) { ; GFX7-LABEL: s_shl_v2i32_zext_v2i16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, 0xffff -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_and_b32 s0, s0, s2 -; GFX7-NEXT: s_or_b32 s0, s1, s0 ; GFX7-NEXT: s_and_b32 s0, s0, 0x3fff3fff ; GFX7-NEXT: s_lshr_b32 s1, s0, 16 -; GFX7-NEXT: s_and_b32 s0, s0, s2 +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff ; GFX7-NEXT: s_lshl_b32 s0, s0, 2 ; GFX7-NEXT: s_lshl_b32 s1, s1, 2 ; GFX7-NEXT: ; return to shader part epilog @@ -502,13 +498,9 @@ ; GFX7-LABEL: v_shl_v2i32_zext_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -485,11 +485,11 @@ ; GFX6-LABEL: v_shl_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_shl_v2i16: @@ -513,6 +513,7 @@ ; GFX6-LABEL: v_shl_v2i16_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 15, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -539,10 +540,11 @@ ; GFX6-LABEL: s_shl_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0xffff -; GFX6-NEXT: s_and_b32 s2, s2, s4 -; GFX6-NEXT: s_lshl_b32 s0, s0, s2 -; GFX6-NEXT: s_and_b32 s2, s3, s4 -; GFX6-NEXT: s_lshl_b32 s1, s1, s2 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 +; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: s_lshl_b32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s2, s3 ; GFX6-NEXT: s_and_b32 s1, s1, s4 ; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 @@ -579,8 +581,9 @@ define amdgpu_ps float @shl_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) { ; GFX6-LABEL: shl_v2i16_sv: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: s_mov_b32 s2, 0xffff -; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 @@ -611,11 +614,12 @@ define amdgpu_ps float @shl_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { ; GFX6-LABEL: shl_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, s1, v1 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0 -; GFX6-NEXT: s_and_b32 s0, s1, s2 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -2688,18 +2688,20 @@ ; GFX6-LABEL: v_ssubsat_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 ; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 -; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_max_i32_e32 v1, v4, v1 +; GFX6-NEXT: v_min_i32_e32 v1, v1, v5 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 @@ -2754,18 +2756,20 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: s_ssubsat_v2i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: s_max_i32 s6, s0, -1 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_sub_i32 s6, s6, s4 ; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: s_min_i32 s7, s0, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s5 -; GFX6-NEXT: s_max_i32 s2, s6, s2 -; GFX6-NEXT: s_min_i32 s2, s2, s7 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_sub_i32 s0, s0, s2 +; GFX6-NEXT: s_max_i32 s1, s6, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s7 +; GFX6-NEXT: s_sub_i32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s2, 16 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 ; GFX6-NEXT: s_max_i32 s3, s1, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 @@ -2840,9 +2844,11 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { ; GFX6-LABEL: ssubsat_v2i16_sv: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_brev_b32 s2, -2 ; GFX6-NEXT: s_max_i32 s4, s0, -1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_sub_i32 s4, s4, s2 ; GFX6-NEXT: s_brev_b32 s3, 1 @@ -2913,9 +2919,11 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: ssubsat_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_brev_b32 s2, -2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 ; GFX6-NEXT: s_brev_b32 s3, 1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -1748,12 +1748,14 @@ ; GFX6-LABEL: v_uaddsat_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_xor_b32_e32 v4, -1, v0 -; GFX6-NEXT: v_min_u32_e32 v2, v4, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v0 +; GFX6-NEXT: v_min_u32_e32 v1, v4, v1 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v1 ; GFX6-NEXT: v_min_u32_e32 v2, v3, v2 @@ -1791,12 +1793,14 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: s_uaddsat_v2i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_not_b32 s4, s0 -; GFX6-NEXT: s_min_u32 s2, s4, s2 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_add_i32 s0, s0, s2 +; GFX6-NEXT: s_not_b32 s4, s0 +; GFX6-NEXT: s_min_u32 s1, s4, s1 +; GFX6-NEXT: s_add_i32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s2, 16 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 ; GFX6-NEXT: s_not_b32 s3, s1 ; GFX6-NEXT: s_min_u32 s2, s3, s2 @@ -1841,7 +1845,9 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { ; GFX6-LABEL: uaddsat_v2i16_sv: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_not_b32 s2, s0 ; GFX6-NEXT: v_min_u32_e32 v0, s2, v0 @@ -1885,7 +1891,9 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: uaddsat_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0 ; GFX6-NEXT: v_min_u32_e32 v2, s0, v2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -1662,11 +1662,13 @@ ; GFX6-LABEL: v_usubsat_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_min_u32_e32 v2, v0, v2 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_min_u32_e32 v1, v0, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_min_u32_e32 v2, v1, v2 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -1703,11 +1705,13 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: s_usubsat_v2i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_min_u32 s2, s0, s2 -; GFX6-NEXT: s_sub_i32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_min_u32 s1, s0, s1 +; GFX6-NEXT: s_sub_i32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s2, 16 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 ; GFX6-NEXT: s_min_u32 s2, s1, s2 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 @@ -1751,6 +1755,8 @@ define amdgpu_ps float @usubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { ; GFX6-LABEL: usubsat_v2i16_sv: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_min_u32_e32 v0, s0, v0 @@ -1793,6 +1799,8 @@ define amdgpu_ps float @usubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: usubsat_v2i16_vs: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: v_min_u32_e32 v2, s0, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -19,13 +19,6 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) { ; GFX7-LABEL: scalar_xnor_v2i16_one_use: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_mov_b32 s4, 0xffff -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_and_b32 s0, s0, s4 -; GFX7-NEXT: s_or_b32 s0, s1, s0 -; GFX7-NEXT: s_lshl_b32 s1, s3, 16 -; GFX7-NEXT: s_and_b32 s2, s2, s4 -; GFX7-NEXT: s_or_b32 s1, s1, s2 ; GFX7-NEXT: s_xor_b32 s0, s0, s1 ; GFX7-NEXT: s_xor_b32 s0, s0, -1 ; GFX7-NEXT: ; return to shader part epilog