diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -923,6 +923,9 @@ return getGeneration() >= GFX9; } + /// \returns true if the machine supports saveexec_b32 instructions. + bool hasSaveexecB32() const { return getGeneration() >= GFX10; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -22,6 +22,7 @@ class GCNSubtarget; class LiveIntervals; class RegisterBank; +struct SGPRSpillBuilder; class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPUGenRegisterInfo { @@ -106,21 +107,15 @@ const TargetRegisterClass *getPointerRegClass( const MachineFunction &MF, unsigned Kind = 0) const override; - void buildWaveVGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index, - Register VGPR, RegScavenger *RS, bool IsLoad, - bool VGPRLive = false, - Register FreeSGPR = 0) const; + bool buildWaveVGPRSpillLoadStore(SGPRSpillBuilder &SB, bool IsLoad, + bool SGPRIsFree = false) const; - void buildVGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index, - int Offset, unsigned EltSize, Register VGPR, - RegScavenger *RS, bool IsLoad, - bool UseKillFromMI = true, + void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, + bool IsLoad, bool UseKillFromMI = true, bool IsKill = true) const; - void buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index, - int Offset, unsigned EltSize, Register VGPR, - int64_t VGPRLanes, RegScavenger *RS, - bool IsLoad) const; + void buildSGPRSpillLoadStore(SGPRSpillBuilder &SB, int Offset, + int64_t VGPRLanes) const; /// If \p OnlyToVGPR is true, this will only succeed if this bool spillSGPR(MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -43,6 +43,133 @@ static const std::array SubRegFromChannelTableWidthMap = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; +namespace llvm { + +// A temporary struct to spill SGPRs. +struct SGPRSpillBuilder { + struct PerVGPRData { + unsigned PerVGPR; + unsigned NumVGPRs; + int64_t VGPRLanes; + }; + + // The SGPR to save + Register SuperReg; + MachineBasicBlock::iterator MI; + ArrayRef SplitParts; + unsigned NumSubRegs; + bool IsKill; + const DebugLoc &DL; + + /* When spilling to stack */ + // The SGPRs are written into this VGPR, which is then written to scratch + // (or vice versa for loads). + Register TmpVGPR = AMDGPU::NoRegister; + // Temporary spill slot to save TmpVGPR to. + int TmpVGPRIndex = 0; + // If TmpVGPR is live before the spill according to LLVM liveness info. + bool TmpVGPRLive = false; + // EXEC is currently saved to SavedExecReg. + bool SavedExecActive = false; + // EXEC can be saved to this subregister of SuperReg. + Register SavedExecReg = AMDGPU::NoRegister; + unsigned SavedExecSubregStart; + // If we only have one SGPR to spare, we only save EXEC_LO. + bool SavedExecLoOnly; + // Stack index to write the SGPRs to. + int Index; + // True for restores, false for saves. + bool IsLoad; + unsigned EltSize = 4; + RegScavenger *RS; + + MachineBasicBlock *MBB; + MachineFunction *MF; + SIMachineFunctionInfo *MFI; + const SIInstrInfo *TII; + + SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo *TII, + MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, + bool IsLoad) + : SuperReg(MI->getOperand(0).getReg()), MI(MI), + IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), + IsLoad(IsLoad), RS(RS), MBB(MI->getParent()), MF(MBB->getParent()), + MFI(MF->getInfo()), TII(TII) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + SplitParts = TRI.getRegSplitParts(RC, EltSize); + NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + + assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); + assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && + SuperReg != AMDGPU::EXEC && "exec should never spill"); + } + + PerVGPRData getPerVGPRData(bool IsWave32) { + PerVGPRData Data; + Data.PerVGPR = IsWave32 ? 32 : 64; + Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR; + Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL; + return Data; + } + + void setTmpVgpr(Register R, bool Live) { + TmpVGPR = R; + RS->setRegUsed(TmpVGPR); + + // Reserve temporary stack slot + if (!MFI->SpillSGPRTmpIndex.hasValue()) { + MachineFrameInfo &FrameInfo = MF->getFrameInfo(); + MFI->SpillSGPRTmpIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); + } + TmpVGPRIndex = *MFI->SpillSGPRTmpIndex; + + TmpVGPRLive = Live; + } + + // Try to fit exec into the SGPRs in SuperReg and set SavedExecReg to the + // resulting register. + void setSavedExecReg(const SIRegisterInfo &TRI, bool IsWave32) { + SavedExecReg = AMDGPU::NoRegister; + SavedExecSubregStart = 0; + if (!IsWave32) { + SavedExecLoOnly = false; + // If src/dst is an odd size it is possible subreg0 is not aligned. + for (; SavedExecSubregStart < (NumSubRegs - 1); ++SavedExecSubregStart) { + SavedExecReg = TRI.getMatchingSuperReg( + TRI.getSubReg(SuperReg, SplitParts[SavedExecSubregStart]), + AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass); + if (SavedExecReg) + break; + } + } + if (!SavedExecReg) { + // Wave32 or we failed to find 2 SGPRs + SavedExecLoOnly = true; + SavedExecReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[0])); + } + } + + void restoreExecFromSGPR(bool UseTmpVGPR = false) { + if (SavedExecActive) { + Register ExecReg = SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + unsigned ExecMovOpc = + SavedExecLoOnly ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + auto I = BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg) + .addReg(SavedExecReg, RegState::Kill); + if (UseTmpVGPR) + I.addReg(TmpVGPR, RegState::Implicit | RegState::Kill); + SavedExecActive = false; + } else { + assert(!UseTmpVGPR && + "Cannot add implicit use when EXEC is already restored"); + } + } +}; + +} // namespace llvm + SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { @@ -1022,134 +1149,170 @@ /// Save or restore all lanes of a VGPR to a stack slot without using any SGPRs. /// -/// We need to save all lanes if we overwrite some lanes when storing an SGPR +/// We need to save the lanes that we overwrite when storing an SGPR /// into a VGPR with v_writelane. If we currently try to spill an SGPR, we do /// not have a free SGPR to save EXEC to, so we save all currently active lanes, /// then flip EXEC (EXEC = EXEC ^ -1), then save the rest of the lanes and flip /// EXEC again to restore its original value. -void SIRegisterInfo::buildWaveVGPRSpillLoadStore(MachineBasicBlock::iterator MI, - int Index, Register VGPR, - RegScavenger *RS, bool IsLoad, - bool VGPRLive, - Register FreeSGPR) const { - unsigned EltSize = 4; - MachineBasicBlock *MBB = MI->getParent(); - const DebugLoc &DL = MI->getDebugLoc(); - const SIInstrInfo *TII = ST.getInstrInfo(); - - // If we have free SGPRs, use that to save EXEC. - Register SavedExecReg = AMDGPU::NoRegister; - if (FreeSGPR) { - const TargetRegisterClass *RC = getPhysRegClass(FreeSGPR); - - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - - if (isWave32) { - SavedExecReg = NumSubRegs == 1 - ? FreeSGPR - : Register(getSubReg(FreeSGPR, SplitParts[0])); - } else { - // If src/dst is an odd size it is possible subreg0 is not aligned. - for (unsigned ExecLane = 0; ExecLane < (NumSubRegs - 1); ++ExecLane) { - SavedExecReg = - getMatchingSuperReg(getSubReg(FreeSGPR, SplitParts[ExecLane]), - AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass); - if (SavedExecReg) - break; - } - } - } - - if (!IsLoad && !VGPRLive) { +/// +/// This saves at least the lower 32 lanes, or all lanes if EXEC fits into the +/// spilled SGPRs. +/// +/// Returns if a use of SB.TmpVGPR needs to be added. +bool SIRegisterInfo::buildWaveVGPRSpillLoadStore(SGPRSpillBuilder &SB, + bool IsLoad, + bool SGPRIsFree) const { + if (!IsLoad && !SB.TmpVGPRLive) { // FIXME LLVM may not know that VGPR is live in other lanes, we need to mark // it as live, otherwise the MachineIR verifier complains. // Only add this if VGPR is currently not live. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY_INACTIVE_LANES), VGPR); + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(AMDGPU::COPY_INACTIVE_LANES), + SB.TmpVGPR); } + // If TmpVGPR is not live in the currently active lanes, we do not need to + // spill them. However, we need to do the same for spill and restore. + // FIXME LLVM may not know that VGPR is used in other lanes, we need to mark // it as used, otherwise it can be removed. // Only add this if VGPR was not live before. - bool NeedsUse = IsLoad && !VGPRLive; - - if (SavedExecReg) { - // Use SGPRs to save exec - Register ExecReg = isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - unsigned ExecMovOpc = isWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - unsigned ExecOrOpc = - isWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; - // Save exec and activate all lanes - BuildMI(*MBB, MI, DL, TII->get(ExecOrOpc), SavedExecReg).addImm(-1); + bool NeedsUse = IsLoad && !SB.TmpVGPRLive; + + if (SGPRIsFree) { + // If we have free SGPRs, use that to save EXEC. + if (!SB.SavedExecActive) { + SB.SavedExecActive = true; + // Save EXEC + if (!SB.TmpVGPRLive) { + // Only spill inactive lanes + if (!SB.SavedExecLoOnly || ST.hasSaveexecB32()) { + // _saveexec_b32 instructions are only available on gfx10+ + unsigned ExecXorOpc = SB.SavedExecLoOnly ? AMDGPU::S_XOR_SAVEEXEC_B32 + : AMDGPU::S_XOR_SAVEEXEC_B64; + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecXorOpc), + SB.SavedExecReg) + .addImm(-1); + } else { + unsigned ExecMovOpc = + SB.SavedExecLoOnly ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + Register ExecReg = + SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + unsigned ExecXorOpc = + SB.SavedExecLoOnly ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64; + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecMovOpc), + SB.SavedExecReg) + .addReg(ExecReg); + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecXorOpc), ExecReg) + .addReg(ExecReg) + .addImm(-1); + } + } else { + if (!SB.SavedExecLoOnly || ST.hasSaveexecB32()) { + // Use only one instruction instead of two moves but may need more + // memory bandwidth, so only do it on gfx10+ where cache lines are + // larger. + unsigned ExecOrOpc = SB.SavedExecLoOnly ? AMDGPU::S_OR_SAVEEXEC_B32 + : AMDGPU::S_OR_SAVEEXEC_B64; + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecOrOpc), + SB.SavedExecReg) + .addImm(-1); + } else { + // Use move instructions + unsigned ExecMovOpc = + SB.SavedExecLoOnly ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + Register ExecReg = + SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecMovOpc), + SB.SavedExecReg) + .addReg(ExecReg); + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecMovOpc), ExecReg) + .addImm(-1); + } + } + } else { + // Already saved, set active lanes + if (!SB.TmpVGPRLive) { + // Only spill inactive lanes + unsigned ExecNotOpc = + SB.SavedExecLoOnly ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; + Register ExecReg = SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecNotOpc), ExecReg) + .addReg(SB.SavedExecReg); + } else { + unsigned ExecMovOpc = + SB.SavedExecLoOnly ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + Register ExecReg = SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecMovOpc), ExecReg) + .addImm(-1); + } + } // Save/restore VGPR - buildVGPRSpillLoadStore(MI, Index, 0, EltSize, VGPR, RS, IsLoad); - - // Restore exec - // FIXME This often creates unnecessary exec moves - auto LastI = BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg) - .addReg(SavedExecReg); - if (NeedsUse) - LastI.addReg(VGPR, RegState::Implicit); + buildVGPRSpillLoadStore(SB, SB.TmpVGPRIndex, 0, IsLoad); } else { - // We cannot set exec to -1, because we do not have a free SGPR, so save the - // currently active lanes, flip exec, save the rest of the lanes and flip - // exec again. - Register ExecReg = isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - unsigned ExecNotOpc = isWave32 ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; + assert(!SB.SavedExecActive && + "We have no free SGPR to save EXEC, so it cannot be saved"); + // We cannot set EXEC to -1 because we do not have a free SGPR, so save the + // currently active lanes, flip EXEC, save the rest of the lanes and flip + // EXEC again. + Register ExecReg = SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + unsigned ExecNotOpc = + SB.SavedExecLoOnly ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; MachineInstrBuilder LastI; for (unsigned I = 0; I < 2; I++) { - // Mark the second store as kill - buildVGPRSpillLoadStore(MI, Index, 0, EltSize, VGPR, RS, IsLoad, false, - !IsLoad && I == 1); - // Flip exec - LastI = - BuildMI(*MBB, MI, DL, TII->get(ExecNotOpc), ExecReg).addReg(ExecReg); + // Spill active lanes only if necessary + if (SB.TmpVGPRLive || I != 0) { + // Mark the second store as kill + buildVGPRSpillLoadStore(SB, SB.TmpVGPRIndex, 0, IsLoad, false, + !IsLoad && I == 1); + } + + // Flip EXEC + LastI = BuildMI(*SB.MBB, SB.MI, SB.DL, SB.TII->get(ExecNotOpc), ExecReg) + .addReg(ExecReg); + } + if (NeedsUse) { + LastI.addReg(SB.TmpVGPR, RegState::Implicit | RegState::Kill); + NeedsUse = false; } - if (NeedsUse) - LastI.addReg(VGPR, RegState::Implicit); } + return NeedsUse; } -void SIRegisterInfo::buildVGPRSpillLoadStore(MachineBasicBlock::iterator MI, - int Index, int Offset, - unsigned EltSize, Register VGPR, - RegScavenger *RS, bool IsLoad, +void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, + int Offset, bool IsLoad, bool UseKillFromMI, bool IsKill) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); - if (UseKillFromMI) - IsKill = MI->getOperand(0).isKill(); + IsKill = SB.MI->getOperand(0).isKill(); // Load/store VGPR - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); + MachineFrameInfo &FrameInfo = SB.MF->getFrameInfo(); assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill); - Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF) - ? getBaseRegister() - : getFrameRegister(*MF); + Register FrameReg = + FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*SB.MF) + ? getBaseRegister() + : getFrameRegister(*SB.MF); Align Alignment = FrameInfo.getObjectAlign(Index); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); - MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*SB.MF, Index); + MachineMemOperand *MMO = SB.MF->getMachineMemOperand( PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore, - EltSize, Alignment); + SB.EltSize, Alignment); if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, Index, VGPR, false, FrameReg, Offset * EltSize, - MMO, RS); + buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, + Offset * SB.EltSize, MMO, SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, Index, VGPR, IsKill, FrameReg, - Offset * EltSize, MMO, RS); + buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, + Offset * SB.EltSize, MMO, SB.RS); // This only ever adds one VGPR spill - MFI->addToSpilledVGPRs(1); + SB.MFI->addToSpilledVGPRs(1); } } @@ -1157,82 +1320,65 @@ // spill such that all the lanes set in VGPRLanes are loaded or stored. // This generates exec mask manipulation and will use SGPRs available in MI // or VGPR lanes in the VGPR to save and restore the exec mask. -void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, - int Index, int Offset, - unsigned EltSize, Register VGPR, - int64_t VGPRLanes, - RegScavenger *RS, - bool IsLoad) const { - MachineBasicBlock *MBB = MI->getParent(); +void SIRegisterInfo::buildSGPRSpillLoadStore(SGPRSpillBuilder &SB, int Offset, + int64_t VGPRLanes) const { const SIInstrInfo *TII = ST.getInstrInfo(); - Register SuperReg = MI->getOperand(0).getReg(); - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); unsigned FirstPart = Offset * 32; - unsigned ExecLane = 0; - - bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - // Cannot handle load/store to EXEC - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - // On Wave32 only handle EXEC_LO. - // On Wave64 only update EXEC_HI if there is sufficent space for a copy. - bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI; - - unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - Register SavedExecReg; - - // Backup EXEC - if (OnlyExecLo) { - SavedExecReg = - NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[FirstPart + ExecLane])); - } else { - // If src/dst is an odd size it is possible subreg0 is not aligned. - for (; ExecLane < (NumSubRegs - 1); ++ExecLane) { - SavedExecReg = getMatchingSuperReg( - getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0, - &AMDGPU::SReg_64_XEXECRegClass); - if (SavedExecReg) - break; + // Save exec to currently free SGPR + if (!SB.SavedExecActive) { + SB.SavedExecActive = true; + + // Save EXEC + if (SB.SavedExecLoOnly && ST.hasSaveexecB32()) { + // Use only one instruction instead of two moves but may need more memory + // bandwidth, so only do it on gfx10+ where cache lines are larger. + unsigned ExecOrOpc = SB.SavedExecLoOnly ? AMDGPU::S_OR_SAVEEXEC_B32 + : AMDGPU::S_OR_SAVEEXEC_B64; + BuildMI(*SB.MBB, SB.MI, SB.DL, TII->get(ExecOrOpc), SB.SavedExecReg) + .addImm(VGPRLanes); + } else { + // Use move instructions + unsigned ExecMovOpc = + SB.SavedExecLoOnly ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + Register ExecReg = SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(*SB.MBB, SB.MI, SB.DL, TII->get(ExecMovOpc), SB.SavedExecReg) + .addReg(ExecReg); + BuildMI(*SB.MBB, SB.MI, SB.DL, TII->get(ExecMovOpc), ExecReg) + .addImm(VGPRLanes); } + } else { + // Already saved, set active lanes + unsigned ExecMovOpc = + SB.SavedExecLoOnly ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + Register ExecReg = SB.SavedExecLoOnly ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(*SB.MBB, SB.MI, SB.DL, TII->get(ExecMovOpc), ExecReg) + .addImm(VGPRLanes); } - assert(SavedExecReg); - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg); - // Setup EXEC - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg).addImm(VGPRLanes); + buildVGPRSpillLoadStore(SB, SB.Index, Offset, SB.IsLoad); - buildVGPRSpillLoadStore(MI, Index, Offset, EltSize, VGPR, RS, IsLoad); + bool CanClobberSGPRs = SB.IsLoad || SB.IsKill; + if (!CanClobberSGPRs) { + SB.restoreExecFromSGPR(); - // Restore EXEC - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg) - .addReg(SavedExecReg, getKillRegState(IsLoad || IsKill)); - - // Restore clobbered SGPRs - if (IsLoad) { - // Nothing to do; register will be overwritten - } else if (!IsKill) { // Restore SGPRs from appropriate VGPR lanes - if (!OnlyExecLo) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), - getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1])) - .addReg(VGPR) - .addImm(ExecLane + 1); + if (!SB.SavedExecLoOnly) { + BuildMI(*SB.MBB, SB.MI, SB.DL, TII->get(AMDGPU::V_READLANE_B32), + getSubReg(SB.SuperReg, + SB.SplitParts[FirstPart + SB.SavedExecSubregStart + 1])) + .addReg(SB.TmpVGPR) + .addImm(SB.SavedExecSubregStart + 1); } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), - NumSubRegs == 1 ? SavedExecReg - : Register(getSubReg( - SuperReg, SplitParts[FirstPart + ExecLane]))) - .addReg(VGPR, RegState::Kill) - .addImm(ExecLane); + BuildMI(*SB.MBB, SB.MI, SB.DL, TII->get(AMDGPU::V_READLANE_B32), + SB.SavedExecLoOnly + ? SB.SavedExecReg + : Register(getSubReg( + SB.SuperReg, + SB.SplitParts[FirstPart + SB.SavedExecSubregStart]))) + .addReg(SB.TmpVGPR, RegState::Kill) + .addImm(SB.SavedExecSubregStart); } } @@ -1240,60 +1386,43 @@ int Index, RegScavenger *RS, bool OnlyToVGPR) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); + SGPRSpillBuilder SB(*this, ST.getInstrInfo(), MI, Index, RS, false); - ArrayRef VGPRSpills - = MFI->getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = + SB.MFI->getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - - Register SuperReg = MI->getOperand(0).getReg(); - bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && - SuperReg != MFI->getFrameOffsetReg())); - - assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - unsigned EltSize = 4; - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + assert(SpillToVGPR || (SB.SuperReg != SB.MFI->getStackPtrOffsetReg() && + SB.SuperReg != SB.MFI->getFrameOffsetReg())); if (SpillToVGPR) { - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - bool UseKill = IsKill && i == NumSubRegs - 1; + bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1; // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) - .addReg(SubReg, getKillRegState(UseKill)) - .addImm(Spill.Lane) - .addReg(Spill.VGPR); + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, + SB.TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) + .addReg(SubReg, getKillRegState(UseKill)) + .addImm(Spill.Lane) + .addReg(Spill.VGPR); - if (i == 0 && NumSubRegs > 1) { + if (i == 0 && SB.NumSubRegs > 1) { // We may be spilling a super-register which is only partially defined, // and need to ensure later spills think the value is defined. - MIB.addReg(SuperReg, RegState::ImplicitDefine); + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } - if (NumSubRegs > 1) - MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit); + if (SB.NumSubRegs > 1) + MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to @@ -1305,76 +1434,92 @@ // FIXME: The liveness analysis is limited and does not tell if a register // is in use in lanes that are currently inactive. We can never be sure if // a register as actually in use in another lane, so we need to save all - // lanes of the chosen VGPR. Pick v0 because it doesn't make a difference. - Register TmpVGPR = AMDGPU::VGPR0; - RS->setRegUsed(TmpVGPR); - - // Reserve temporary stack slot - if (!MFI->SpillSGPRTmpIndex.hasValue()) { - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - MFI->SpillSGPRTmpIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); + // lanes of the chosen VGPR. + Register TmpVGPR = + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); + if (TmpVGPR) { + // Found a register that is dead in the currently active lanes, we only + // need to spill inactive lanes. + SB.setTmpVgpr(TmpVGPR, false); + } else { + // Pick v0 because it doesn't make a difference. + SB.setTmpVgpr(AMDGPU::VGPR0, true); } - unsigned TmpVGPRIndex = *MFI->SpillSGPRTmpIndex; - - // Check if TmpVGPR is currently live according to LLVM liveness info - RegScavenger TmpRS; - TmpRS.enterBasicBlock(*MBB); - TmpRS.forward(MI); - bool TmpVGPRLive = TmpRS.isRegUsed(TmpVGPR); + SB.setSavedExecReg(*this, isWave32); + + // The resulting code looks like this in the worst case: + // ; Save VGPR + // buffer_store_dword v0 + // s_not exec, exec + // buffer_store_dword v0 + // s_not exec, exec + // v_writelane v0, s1, 0 + // v_writelane v0, s2, 1 + // ; Save exec_lo only as, s[1:2] is not aligned + // s_mov s1, exec_lo + // ; Set exec mask to spill selected lanes + // s_mov exec_lo, 3 + // buffer_store_dword v0 offset:4 + // s_mov exec_lo, s1 + // ; Restore SGPRs if they are not killed by the store + // v_readlane s1, v0, 0 + // ; Restore VGPR + // buffer_load_dword v0 + // s_not exec, exec + // buffer_load_dword v0 + // s_not exec, exec // Save TmpVGPR - buildWaveVGPRSpillLoadStore(MI, TmpVGPRIndex, TmpVGPR, RS, false, - TmpVGPRLive); + buildWaveVGPRSpillLoadStore(SB, false); - // SubReg carries the "Kill" flag when SubReg == SuperReg. - unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); + // SubReg carries the "Kill" flag when SubReg == SB.SuperReg. + unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); - unsigned PerVGPR = 32; - unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; - int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; + // Per VGPR helper data + auto PVD = SB.getPerVGPRData(isWave32); - for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) { + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { unsigned TmpVGPRFlags = RegState::Undef; // Write sub registers into the VGPR - for (unsigned i = Offset * PerVGPR, - e = std::min((Offset + 1) * PerVGPR, NumSubRegs); + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); MachineInstrBuilder WriteLane = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), TmpVGPR) + BuildMI(*SB.MBB, MI, SB.DL, SB.TII->get(AMDGPU::V_WRITELANE_B32), + SB.TmpVGPR) .addReg(SubReg, SubKillState) - .addImm(i % PerVGPR) - .addReg(TmpVGPR, TmpVGPRFlags); + .addImm(i % PVD.PerVGPR) + .addReg(SB.TmpVGPR, TmpVGPRFlags); TmpVGPRFlags = 0; // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? - if (NumSubRegs > 1) { - // The last implicit use of the SuperReg carries the "Kill" flag. + if (SB.NumSubRegs > 1) { + // The last implicit use of the SB.SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; - if (i + 1 == NumSubRegs) - SuperKillState |= getKillRegState(IsKill); - WriteLane.addReg(SuperReg, RegState::Implicit | SuperKillState); + if (i + 1 == SB.NumSubRegs) + SuperKillState |= getKillRegState(SB.IsKill); + WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); } } // Write out VGPR - buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes, - RS, false); + buildSGPRSpillLoadStore(SB, Offset, PVD.VGPRLanes); } // Restore temporary VGPR - buildWaveVGPRSpillLoadStore(MI, TmpVGPRIndex, TmpVGPR, RS, true, - TmpVGPRLive, - IsKill ? SuperReg : AMDGPU::NoRegister); + bool NeedsUse = buildWaveVGPRSpillLoadStore(SB, true, SB.IsKill); + SB.restoreExecFromSGPR(NeedsUse); } MI->eraseFromParent(); - MFI->addToSpilledSGPRs(NumSubRegs); + SB.MFI->addToSpilledSGPRs(SB.NumSubRegs); return true; } @@ -1382,44 +1527,28 @@ int Index, RegScavenger *RS, bool OnlyToVGPR) const { - MachineFunction *MF = MI->getParent()->getParent(); - MachineBasicBlock *MBB = MI->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); + SGPRSpillBuilder SB(*this, ST.getInstrInfo(), MI, Index, RS, true); - ArrayRef VGPRSpills - = MFI->getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = + SB.MFI->getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - const DebugLoc &DL = MI->getDebugLoc(); - - Register SuperReg = MI->getOperand(0).getReg(); - - assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - unsigned EltSize = 4; - - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - if (SpillToVGPR) { - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, + SB.TII->get(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } } else { // Scavenged temporary VGPR to use. It must be scavenged once for any number @@ -1427,56 +1556,52 @@ // FIXME: The liveness analysis is limited and does not tell if a register // is in use in lanes that are currently inactive. We can never be sure if // a register as actually in use in another lane, so we need to save all - // lanes of the chosen VGPR. Pick v0 because it doesn't make a difference. - Register TmpVGPR = AMDGPU::VGPR0; - RS->setRegUsed(TmpVGPR); - - if (!MFI->SpillSGPRTmpIndex.hasValue()) { - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - MFI->SpillSGPRTmpIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); + // lanes of the chosen VGPR. + Register TmpVGPR = + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); + if (TmpVGPR) { + // Found a register that is dead in the currently active lanes, we only + // need to spill inactive lanes. + SB.setTmpVgpr(TmpVGPR, false); + } else { + // Pick v0 because it doesn't make a difference. + SB.setTmpVgpr(AMDGPU::VGPR0, true); } - unsigned TmpVGPRIndex = MFI->SpillSGPRTmpIndex.getValue(); - - // Check if TmpVGPR is currently live according to LLVM liveness info - RegScavenger TmpRS; - TmpRS.enterBasicBlock(*MBB); - TmpRS.forward(MI); - bool TmpVGPRLive = TmpRS.isRegUsed(TmpVGPR); + SB.setSavedExecReg(*this, isWave32); // Save temporary VGPR - buildWaveVGPRSpillLoadStore(MI, TmpVGPRIndex, TmpVGPR, RS, false, - TmpVGPRLive, SuperReg); + buildWaveVGPRSpillLoadStore(SB, false, true); - unsigned PerVGPR = 32; - unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; - int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; + // Per VGPR helper data + auto PVD = SB.getPerVGPRData(isWave32); - for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) { + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { // Load in VGPR data - buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes, - RS, true); + buildSGPRSpillLoadStore(SB, Offset, PVD.VGPRLanes); + SB.restoreExecFromSGPR(); // Unpack lanes - for (unsigned i = Offset * PerVGPR, - e = std::min((Offset + 1) * PerVGPR, NumSubRegs); + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); bool LastSubReg = (i + 1 == e); - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(TmpVGPR, getKillRegState(LastSubReg)) - .addImm(i); - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, + SB.TII->get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) + .addImm(i); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } } // Restore TmpVGPR - buildWaveVGPRSpillLoadStore(MI, TmpVGPRIndex, TmpVGPR, RS, true, - TmpVGPRLive); + bool NeedsUse = buildWaveVGPRSpillLoadStore(SB, true); + SB.restoreExecFromSGPR(NeedsUse); } MI->eraseFromParent(); diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -15,8 +15,8 @@ ; SPILL-TO-VGPR: v_readlane_b32 s4, v40, 0 ; SPILL-TO-VGPR: v_readlane_b32 s5, v40, 1 -; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v0, 0 -; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v0, 1 +; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v1, 0 +; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v1, 1 ; SPILL-TO-VGPR: v_readlane_b32 s33, v40, 2 ; NO-SPILL-TO-VGPR: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -762,7 +762,6 @@ ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#COPY_INACTIVE_LANES v0 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_not_b64 exec, exec ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_not_b64 exec, exec @@ -771,8 +770,7 @@ ; GCN-NEXT: s_mov_b64 s[2:3], exec ; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[2:3] -; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GCN-NEXT: s_not_b64 exec, s[2:3] ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 exec, s[2:3] @@ -852,17 +850,14 @@ ; GCN-NEXT: v_readlane_b32 s18, v31, 62 ; GCN-NEXT: v_readlane_b32 s19, v31, 63 ; GCN-NEXT: ;;#COPY_INACTIVE_LANES v0 -; GCN-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GCN-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], exec ; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s0, v0, 0 ; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_not_b64 exec, exec ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -1009,7 +1004,6 @@ ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#COPY_INACTIVE_LANES v0 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_not_b64 exec, exec ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_not_b64 exec, exec @@ -1018,8 +1012,7 @@ ; GCN-NEXT: s_mov_b64 s[2:3], exec ; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[2:3] -; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 +; GCN-NEXT: s_not_b64 exec, s[2:3] ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 exec, s[2:3] @@ -1101,19 +1094,18 @@ ; GCN-NEXT: v_readlane_b32 s17, v31, 61 ; GCN-NEXT: v_readlane_b32 s18, v31, 62 ; GCN-NEXT: v_readlane_b32 s19, v31, 63 -; GCN-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], exec +; GCN-NEXT: ;;#COPY_INACTIVE_LANES v1 +; GCN-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload +; GCN-NEXT: v_readlane_b32 s0, v1, 0 +; GCN-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-NEXT: s_not_b64 exec, exec -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_not_b64 exec, exec ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] @@ -1124,7 +1116,6 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use v0 ; GCN-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -1,186 +1,10 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,MUBUF %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN32,MUBUF %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,FLATSCR %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN64-MUBUF %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN32-MUBUF %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN64-FLATSCR %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -filetype=obj -verify-machineinstrs -start-before=prologepilog %s -o /dev/null # Check not crashing when emitting ISA - -# CHECK-LABEL: name: check_spill - -# FLATSCR: $sgpr33 = S_MOV_B32 0 -# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc -# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc - -# S32 with kill -# CHECK: V_WRITELANE -# CHECK: $sgpr12 = S_MOV_B32 $exec_lo -# CHECK: $exec_lo = S_MOV_B32 1 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 4 -# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12 - -# S32 without kill -# CHECK: V_WRITELANE -# CHECK: $sgpr12 = S_MOV_B32 $exec_lo -# CHECK: $exec_lo = S_MOV_B32 1 -# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 4 -# CHECK: $sgpr12 = V_READLANE - -# S64 with kill -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 3 -# GCN64: $exec = S_MOV_B64 3 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 8 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S64 without kill -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 3 -# GCN64: $exec = S_MOV_B64 3 -# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 8 -# GCN32: $exec_lo = S_MOV_B32 $sgpr12 -# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13 -# GCN64: $sgpr13 = V_READLANE -# CHECK: $sgpr12 = V_READLANE - -# S96 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 7 -# GCN64: $exec = S_MOV_B64 7 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 16 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S128 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 15 -# GCN64: $exec = S_MOV_B64 15 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 28 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S160 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 31 -# GCN64: $exec = S_MOV_B64 31 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 44 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 44 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S256 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 255 -# GCN64: $exec = S_MOV_B64 255 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 64 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 64 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S512 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 65535 -# GCN64: $exec = S_MOV_B64 65535 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 96 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 96 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S1024 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr64 = S_MOV_B32 $exec_lo -# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 4294967295 -# GCN64: $exec = S_MOV_B64 4294967295 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 160 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 -# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65 - --- | define amdgpu_kernel void @check_spill() #0 { @@ -230,6 +54,974 @@ bb.0: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 + ; GCN32-LABEL: name: check_spill + ; GCN32: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 + ; GCN32: $sgpr33 = S_MOV_B32 0 + ; GCN32: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 15, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 31, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 255, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 65535, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $sgpr64 = S_OR_SAVEEXEC_B32 4294967295, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 + ; FLATSCR-LABEL: name: check_spill + ; FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 + ; FLATSCR: $sgpr33 = S_MOV_B32 0 + ; FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc + ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc + ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo + ; FLATSCR: $exec_lo = S_MOV_B32 1 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: $exec_lo = S_MOV_B32 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 + ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 + ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo + ; FLATSCR: $exec_lo = S_MOV_B32 1 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 + ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 3 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 3 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 7 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 15 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 31 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 255 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 65535 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 4294967295 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65 + ; GCN64-MUBUF-LABEL: name: check_spill + ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 + ; GCN64-MUBUF: $sgpr33 = S_MOV_B32 0 + ; GCN64-MUBUF: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr30 = S_MOV_B32 4294967295, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-MUBUF: $sgpr12 = S_MOV_B32 $exec_lo + ; GCN64-MUBUF: $exec_lo = S_MOV_B32 1 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; GCN64-MUBUF: $sgpr12 = S_MOV_B32 $exec_lo + ; GCN64-MUBUF: $exec_lo = S_MOV_B32 1 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN64-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 3 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 3 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 7 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 15 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 31 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 255 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 65535 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN64-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $sgpr64_sgpr65 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 4294967295 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN64-MUBUF: $exec = S_NOT_B64 $sgpr64_sgpr65, implicit-def $scc + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr64_sgpr65, implicit killed $vgpr0 + ; GCN32-MUBUF-LABEL: name: check_spill + ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 + ; GCN32-MUBUF: $sgpr33 = S_MOV_B32 0 + ; GCN32-MUBUF: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 15, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 31, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 255, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $sgpr12 = S_OR_SAVEEXEC_B32 65535, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN32-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN32-MUBUF: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $sgpr64 = S_OR_SAVEEXEC_B32 4294967295, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_NOT_B32 $sgpr64, implicit-def $scc + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr64, implicit killed $vgpr0 + ; GCN64-FLATSCR-LABEL: name: check_spill + ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 + ; GCN64-FLATSCR: $sgpr33 = S_MOV_B32 0 + ; GCN64-FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc + ; GCN64-FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc + ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo + ; GCN64-FLATSCR: $exec_lo = S_MOV_B32 1 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $sgpr12, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; GCN64-FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo + ; GCN64-FLATSCR: $exec_lo = S_MOV_B32 1 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN64-FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec_lo = S_NOT_B32 $exec_lo, implicit-def $scc, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 3 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 3 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 7 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 15 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 31 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 255 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 65535 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr12_sgpr13, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13, implicit killed $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN64-FLATSCR: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 4294967295 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5) + ; GCN64-FLATSCR: $exec = S_NOT_B64 $sgpr64_sgpr65, implicit-def $scc + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65, implicit killed $vgpr0 renamable $sgpr12 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 @@ -260,161 +1052,6 @@ renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 - -# CHECK-LABEL: name: check_reload - -# FLATSCR: $sgpr33 = S_MOV_B32 0 -# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc -# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc - -# S32 -# CHECK: $sgpr12 = S_MOV_B32 $exec_lo -# CHECK: $exec_lo = S_MOV_B32 1 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4 -# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12 -# CHECK: $sgpr12 = V_READLANE - -# S64 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 3 -# GCN64: $exec = S_MOV_B64 3 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE - -# S96 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 7 -# GCN64: $exec = S_MOV_B64 7 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 16 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE - -# S128 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 15 -# GCN64: $exec = S_MOV_B64 15 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 28 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE - -# S160 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 31 -# GCN64: $exec = S_MOV_B64 31 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 44 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE -# CHECK: $sgpr16 = V_READLANE - -# S256 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 255 -# GCN64: $exec = S_MOV_B64 255 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 64 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE -# CHECK: $sgpr16 = V_READLANE -# CHECK: $sgpr17 = V_READLANE -# CHECK: $sgpr18 = V_READLANE -# CHECK: $sgpr19 = V_READLANE - -# S512 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 65535 -# GCN64: $exec = S_MOV_B64 65535 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 96 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE -# CHECK: $sgpr16 = V_READLANE -# CHECK: $sgpr17 = V_READLANE -# CHECK: $sgpr18 = V_READLANE -# CHECK: $sgpr19 = V_READLANE -# CHECK: $sgpr20 = V_READLANE -# CHECK: $sgpr21 = V_READLANE -# CHECK: $sgpr22 = V_READLANE -# CHECK: $sgpr23 = V_READLANE -# CHECK: $sgpr24 = V_READLANE -# CHECK: $sgpr25 = V_READLANE -# CHECK: $sgpr26 = V_READLANE -# CHECK: $sgpr27 = V_READLANE - -# S1024 -# GCN32: $sgpr64 = S_MOV_B32 $exec_lo -# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 4294967295 -# GCN64: $exec = S_MOV_B64 4294967295 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 -# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65 -# CHECK: $sgpr64 = V_READLANE -# CHECK: $sgpr65 = V_READLANE -# CHECK: $sgpr66 = V_READLANE -# CHECK: $sgpr67 = V_READLANE -# CHECK: $sgpr68 = V_READLANE -# CHECK: $sgpr69 = V_READLANE -# CHECK: $sgpr70 = V_READLANE -# CHECK: $sgpr71 = V_READLANE -# CHECK: $sgpr72 = V_READLANE -# CHECK: $sgpr73 = V_READLANE -# CHECK: $sgpr74 = V_READLANE -# CHECK: $sgpr75 = V_READLANE -# CHECK: $sgpr76 = V_READLANE -# CHECK: $sgpr77 = V_READLANE -# CHECK: $sgpr78 = V_READLANE -# CHECK: $sgpr79 = V_READLANE -# CHECK: $sgpr80 = V_READLANE -# CHECK: $sgpr81 = V_READLANE -# CHECK: $sgpr82 = V_READLANE -# CHECK: $sgpr83 = V_READLANE -# CHECK: $sgpr84 = V_READLANE -# CHECK: $sgpr85 = V_READLANE -# CHECK: $sgpr86 = V_READLANE -# CHECK: $sgpr87 = V_READLANE -# CHECK: $sgpr88 = V_READLANE -# CHECK: $sgpr89 = V_READLANE -# CHECK: $sgpr90 = V_READLANE -# CHECK: $sgpr91 = V_READLANE -# CHECK: $sgpr92 = V_READLANE -# CHECK: $sgpr93 = V_READLANE -# CHECK: $sgpr94 = V_READLANE -# CHECK: $sgpr95 = V_READLANE - --- name: check_reload tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll --- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -13,11 +13,11 @@ ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1 ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2 ; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3 -; SGPR-NEXT: buffer_load_dword v0, off, s[{{[0-9]+:[0-9]+}}], 0 -; SGPR-NEXT: s_not_b64 exec, exec -; SGPR-NEXT: buffer_load_dword v0, off, s[96:99], 0 ; 4-byte Folded Reload ; SGPR-NEXT: s_not_b64 exec, exec +; SGPR-NEXT: buffer_load_dword [[VHI]], off, s[96:99], 0 ; 4-byte Folded Reload ; SGPR-NEXT: s_waitcnt vmcnt(0) +; SGPR-NEXT: s_not_b64 exec, exec +; SGPR-NEXT: s_nop 0 ; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0 ; ALL: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -47,7 +47,6 @@ ; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $vcc = IMPLICIT_DEF ; GFX9: $vgpr0 = COPY_INACTIVE_LANES implicit $exec - ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc @@ -56,15 +55,14 @@ ; GFX9: $vcc = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $exec = S_MOV_B64 $vcc + ; GFX9: $exec = S_MOV_B64 killed $vcc ; GFX9: $vcc_hi = V_READLANE_B32 $vgpr0, 1 ; GFX9: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0 - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit $vgpr0 + ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit killed $vgpr0 ; GFX9: $vcc = IMPLICIT_DEF - ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $vgpr0 = COPY_INACTIVE_LANES implicit $exec ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc @@ -73,23 +71,20 @@ ; GFX9: $vcc = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $exec = S_MOV_B64 killed $vcc - ; GFX9: $vcc = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX9: $exec = S_NOT_B64 $vcc, implicit-def $scc ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX9: $exec = S_MOV_B64 $vcc - ; GFX9: $vcc = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX9: $exec = S_MOV_B64 killed $vcc, implicit killed $vgpr0 + ; GFX9: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GFX9: $vcc = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) - ; GFX9: $exec = S_MOV_B64 $vcc - ; GFX9: $vcc = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $vcc ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GFX9: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit killed $vgpr0 ; GFX10-LABEL: name: check_vcc ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX10: $sgpr33 = S_MOV_B32 0 @@ -101,7 +96,6 @@ ; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10: $vcc = IMPLICIT_DEF ; GFX10: $vgpr0 = COPY_INACTIVE_LANES implicit $exec - ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc @@ -110,15 +104,14 @@ ; GFX10: $vcc = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $exec = S_MOV_B64 $vcc + ; GFX10: $exec = S_MOV_B64 killed $vcc ; GFX10: $vcc_hi = V_READLANE_B32 $vgpr0, 1 ; GFX10: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0 - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit $vgpr0 + ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit killed $vgpr0 ; GFX10: $vcc = IMPLICIT_DEF - ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $vgpr0 = COPY_INACTIVE_LANES implicit $exec ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc @@ -127,23 +120,20 @@ ; GFX10: $vcc = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $exec = S_MOV_B64 killed $vcc - ; GFX10: $vcc = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10: $exec = S_NOT_B64 $vcc, implicit-def $scc ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX10: $exec = S_MOV_B64 $vcc - ; GFX10: $vcc = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10: $exec = S_MOV_B64 killed $vcc, implicit killed $vgpr0 + ; GFX10: $vgpr0 = COPY_INACTIVE_LANES implicit $exec + ; GFX10: $vcc = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) - ; GFX10: $exec = S_MOV_B64 $vcc - ; GFX10: $vcc = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) ; GFX10: $exec = S_MOV_B64 killed $vcc ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc + ; GFX10: $exec = S_NOT_B64 $exec, implicit-def $scc, implicit killed $vgpr0 $vcc = IMPLICIT_DEF SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32