diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1237,16 +1237,8 @@ if (!allStackObjectsAreDead(MFI)) { assert(RS && "RegScavenger required if spilling"); - if (FuncInfo->isEntryFunction()) { - int ScavengeFI = MFI.CreateFixedObject( - TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); - RS->addScavengingFrameIndex(ScavengeFI); - } else { - int ScavengeFI = MFI.CreateStackObject( - TRI->getSpillSize(AMDGPU::SGPR_32RegClass), - TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false); - RS->addScavengingFrameIndex(ScavengeFI); - } + // Add an emergency spill slot + RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); } } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -480,6 +480,10 @@ // VGPRs used for AGPR spills. SmallVector SpillVGPR; + // Emergency stack slot. Sometimes, we create this before finalizing the stack + // frame, so save it here and add it to the RegScavenger later. + Optional ScavengeFI; + public: // FIXME /// If this is set, an SGPR used for save/restore of the register used for the /// frame pointer. @@ -536,6 +540,8 @@ bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); void removeDeadFrameIndices(MachineFrameInfo &MFI); + int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); + bool hasCalculatedTID() const { return TIDReg != 0; }; Register getTIDReg() const { return TIDReg; }; void setTIDReg(Register Reg) { TIDReg = Reg; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -440,6 +440,21 @@ } } +int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, + const SIRegisterInfo &TRI) { + if (ScavengeFI) + return *ScavengeFI; + if (isEntryFunction()) { + ScavengeFI = MFI.CreateFixedObject( + TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); + } else { + ScavengeFI = MFI.CreateStackObject( + TRI.getSpillSize(AMDGPU::SGPR_32RegClass), + TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false); + } + return *ScavengeFI; +} + MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -22,6 +22,7 @@ class GCNSubtarget; class LiveIntervals; class RegisterBank; +struct SGPRSpillBuilder; class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPUGenRegisterInfo { @@ -106,10 +107,11 @@ const TargetRegisterClass *getPointerRegClass( const MachineFunction &MF, unsigned Kind = 0) const override; - void buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index, - int Offset, unsigned EltSize, Register VGPR, - int64_t VGPRLanes, RegScavenger *RS, - bool IsLoad) const; + void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, + bool IsLoad, bool IsKill = true) const; + + void buildSGPRSpillLoadStore(SGPRSpillBuilder &SB, int Offset, + int64_t VGPRLanes) const; /// If \p OnlyToVGPR is true, this will only succeed if this bool spillSGPR(MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -43,6 +43,231 @@ static const std::array SubRegFromChannelTableWidthMap = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; +namespace llvm { + +// A temporary struct to spill SGPRs. +// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits +// just v_writelane and v_readlane. +// +// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR +// is saved to scratch (or the other way around for loads). +// For this, a VGPR is required where the needed lanes can be clobbered. The +// RegScavenger can provide a VGPR where currently active lanes can be +// clobbered, but we still need to save inactive lanes. +// The high-level steps are: +// - Try to scavenge SGPR(s) to save exec +// - Try to scavenge VGPR +// - Save needed, all or inactive lanes of a TmpVGPR +// - Spill/Restore SGPRs using TmpVGPR +// - Restore TmpVGPR +// +// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we +// cannot scavenge temporary SGPRs to save exec, we use the following code: +// buffer_store_dword TmpVGPR ; only if active lanes need to be saved +// s_not exec, exec +// buffer_store_dword TmpVGPR ; save inactive lanes +// s_not exec, exec +struct SGPRSpillBuilder { + struct PerVGPRData { + unsigned PerVGPR; + unsigned NumVGPRs; + int64_t VGPRLanes; + }; + + // The SGPR to save + Register SuperReg; + MachineBasicBlock::iterator MI; + ArrayRef SplitParts; + unsigned NumSubRegs; + bool IsKill; + const DebugLoc &DL; + + /* When spilling to stack */ + // The SGPRs are written into this VGPR, which is then written to scratch + // (or vice versa for loads). + Register TmpVGPR = AMDGPU::NoRegister; + // Temporary spill slot to save TmpVGPR to. + int TmpVGPRIndex = 0; + // If TmpVGPR is live before the spill or if it is scavenged. + bool TmpVGPRLive = false; + // Scavenged SGPR to save EXEC. + Register SavedExecReg = AMDGPU::NoRegister; + // Stack index to write the SGPRs to. + int Index; + unsigned EltSize = 4; + + RegScavenger &RS; + MachineBasicBlock &MBB; + MachineFunction &MF; + SIMachineFunctionInfo &MFI; + const SIInstrInfo &TII; + const SIRegisterInfo &TRI; + bool IsWave32; + Register ExecReg; + unsigned MovOpc; + unsigned NotOpc; + + SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, + bool IsWave32, MachineBasicBlock::iterator MI, int Index, + RegScavenger &RS) + : SuperReg(MI->getOperand(0).getReg()), MI(MI), + IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), + RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()), + MFI(*MF.getInfo()), TII(TII), TRI(TRI), + IsWave32(IsWave32) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + SplitParts = TRI.getRegSplitParts(RC, EltSize); + NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + + if (IsWave32) { + ExecReg = AMDGPU::EXEC_LO; + MovOpc = AMDGPU::S_MOV_B32; + NotOpc = AMDGPU::S_NOT_B32; + } else { + ExecReg = AMDGPU::EXEC; + MovOpc = AMDGPU::S_MOV_B64; + NotOpc = AMDGPU::S_NOT_B64; + } + + assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); + assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && + SuperReg != AMDGPU::EXEC && "exec should never spill"); + } + + PerVGPRData getPerVGPRData() { + PerVGPRData Data; + Data.PerVGPR = IsWave32 ? 32 : 64; + Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR; + Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL; + return Data; + } + + // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is + // free. + // Writes these instructions if an SGPR can be scavenged: + // s_mov_b64 s[6:7], exec ; Save exec + // s_mov_b64 exec, 3 ; Wanted lanemask + // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot + // + // Writes these instructions if no SGPR can be scavenged: + // buffer_store_dword v0 ; Only if no free VGPR was found + // s_not_b64 exec, exec + // buffer_store_dword v0 ; Save inactive lanes + // ; exec stays inverted, it is flipped back in + // ; restore. + void prepare() { + // Scavenged temporary VGPR to use. It must be scavenged once for any number + // of spilled subregs. + // FIXME: The liveness analysis is limited and does not tell if a register + // is in use in lanes that are currently inactive. We can never be sure if + // a register as actually in use in another lane, so we need to save all + // used lanes of the chosen VGPR. + TmpVGPR = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); + + // Reserve temporary stack slot + TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI); + if (TmpVGPR) { + // Found a register that is dead in the currently active lanes, we only + // need to spill inactive lanes. + TmpVGPRLive = false; + } else { + // Pick v0 because it doesn't make a difference. + TmpVGPR = AMDGPU::VGPR0; + TmpVGPRLive = true; + } + + // Try to scavenge SGPRs to save exec + assert(!SavedExecReg && "Exec is already saved, refuse to save again"); + const TargetRegisterClass &RC = + IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass; + RS.setRegUsed(SuperReg); + SavedExecReg = RS.scavengeRegister(&RC, MI, 0, false); + + int64_t VGPRLanes = getPerVGPRData().VGPRLanes; + + if (SavedExecReg) { + // Set exec to needed lanes + BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); + auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); + if (!TmpVGPRLive) + I.addReg(TmpVGPR, RegState::ImplicitDefine); + // Spill needed lanes + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); + } else { + // Spill active lanes + if (TmpVGPRLive) + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, + /*IsKill*/ false); + // Spill inactive lanes + auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + if (!TmpVGPRLive) + I.addReg(TmpVGPR, RegState::ImplicitDefine); + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); + } + } + + // Writes these instructions if an SGPR can be scavenged: + // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot + // s_waitcnt vmcnt(0) ; If a free VGPR was found + // s_mov_b64 exec, s[6:7] ; Save exec + // + // Writes these instructions if no SGPR can be scavenged: + // buffer_load_dword v0 ; Restore inactive lanes + // s_waitcnt vmcnt(0) ; If a free VGPR was found + // s_not_b64 exec, exec + // buffer_load_dword v0 ; Only if no free VGPR was found + void restore() { + if (SavedExecReg) { + // Restore used lanes + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, + /*IsKill*/ false); + // Restore exec + auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) + .addReg(SavedExecReg, RegState::Kill); + // Add an implicit use of the load so it is not dead. + // FIXME This inserts an unnecessary waitcnt + if (!TmpVGPRLive) { + I.addReg(TmpVGPR, RegState::Implicit); + } + } else { + // Restore inactive lanes + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, + /*IsKill*/ false); + auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + if (!TmpVGPRLive) { + I.addReg(TmpVGPR, RegState::Implicit); + } + // Restore active lanes + if (TmpVGPRLive) + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true); + } + } + + // Write TmpVGPR to memory or read TmpVGPR from memory. + // Either using a single buffer_load/store if exec is set to the needed mask + // or using + // buffer_load + // s_not exec, exec + // buffer_load + // s_not exec, exec + void readWriteTmpVGPR(unsigned Offset, bool IsLoad) { + if (SavedExecReg) { + // Spill needed lanes + TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); + } else { + // Spill active lanes + TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, + /*IsKill*/ false); + // Spill inactive lanes + BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); + BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + } + } +}; + +} // namespace llvm + SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { @@ -1039,120 +1264,36 @@ } } -// Generate a VMEM access which loads or stores the VGPR containing an SGPR -// spill such that all the lanes set in VGPRLanes are loaded or stored. -// This generates exec mask manipulation and will use SGPRs available in MI -// or VGPR lanes in the VGPR to save and restore the exec mask. -void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, - int Index, int Offset, - unsigned EltSize, Register VGPR, - int64_t VGPRLanes, - RegScavenger *RS, - bool IsLoad) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); - - Register SuperReg = MI->getOperand(0).getReg(); - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - unsigned FirstPart = Offset * 32; - unsigned ExecLane = 0; - - bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - // Cannot handle load/store to EXEC - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - // On Wave32 only handle EXEC_LO. - // On Wave64 only update EXEC_HI if there is sufficent space for a copy. - bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI; - - unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - Register SavedExecReg; - - // Backup EXEC - if (OnlyExecLo) { - SavedExecReg = - NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[FirstPart + ExecLane])); - } else { - // If src/dst is an odd size it is possible subreg0 is not aligned. - for (; ExecLane < (NumSubRegs - 1); ++ExecLane) { - SavedExecReg = getMatchingSuperReg( - getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0, - &AMDGPU::SReg_64_XEXECRegClass); - if (SavedExecReg) - break; - } - } - assert(SavedExecReg); - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg); - - // Setup EXEC - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg).addImm(VGPRLanes); - +void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, + int Offset, bool IsLoad, + bool IsKill) const { // Load/store VGPR - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); + MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo(); assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill); - Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF) - ? getBaseRegister() - : getFrameRegister(*MF); + Register FrameReg = + FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF) + ? getBaseRegister() + : getFrameRegister(SB.MF); Align Alignment = FrameInfo.getObjectAlign(Index); - MachinePointerInfo PtrInfo = - MachinePointerInfo::getFixedStack(*MF, Index); - MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index); + MachineMemOperand *MMO = SB.MF.getMachineMemOperand( PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore, - EltSize, Alignment); + SB.EltSize, Alignment); if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, - Index, - VGPR, false, - FrameReg, - Offset * EltSize, MMO, - RS); + buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, + Offset * SB.EltSize, MMO, &SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, Index, VGPR, - IsKill, FrameReg, - Offset * EltSize, MMO, RS); + buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, + Offset * SB.EltSize, MMO, &SB.RS); // This only ever adds one VGPR spill - MFI->addToSpilledVGPRs(1); - } - - // Restore EXEC - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg) - .addReg(SavedExecReg, getKillRegState(IsLoad || IsKill)); - - // Restore clobbered SGPRs - if (IsLoad) { - // Nothing to do; register will be overwritten - } else if (!IsKill) { - // Restore SGPRs from appropriate VGPR lanes - if (!OnlyExecLo) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), - getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1])) - .addReg(VGPR) - .addImm(ExecLane + 1); - } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), - NumSubRegs == 1 ? SavedExecReg - : Register(getSubReg( - SuperReg, SplitParts[FirstPart + ExecLane]))) - .addReg(VGPR, RegState::Kill) - .addImm(ExecLane); + SB.MFI.addToSpilledVGPRs(1); } } @@ -1160,115 +1301,97 @@ int Index, RegScavenger *RS, bool OnlyToVGPR) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, *RS); - ArrayRef VGPRSpills - = MFI->getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = + SB.MFI.getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - - Register SuperReg = MI->getOperand(0).getReg(); - bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && - SuperReg != MFI->getFrameOffsetReg())); - - assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - unsigned EltSize = 4; - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && + SB.SuperReg != SB.MFI.getFrameOffsetReg())); if (SpillToVGPR) { - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - bool UseKill = IsKill && i == NumSubRegs - 1; + bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1; // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) - .addReg(SubReg, getKillRegState(UseKill)) - .addImm(Spill.Lane) - .addReg(Spill.VGPR); + auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + Spill.VGPR) + .addReg(SubReg, getKillRegState(UseKill)) + .addImm(Spill.Lane) + .addReg(Spill.VGPR); - if (i == 0 && NumSubRegs > 1) { + if (i == 0 && SB.NumSubRegs > 1) { // We may be spilling a super-register which is only partially defined, // and need to ensure later spills think the value is defined. - MIB.addReg(SuperReg, RegState::ImplicitDefine); + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } - if (NumSubRegs > 1) - MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit); + if (SB.NumSubRegs > 1) + MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } } else { - // Scavenged temporary VGPR to use. It must be scavenged once for any number - // of spilled subregs. - Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - RS->setRegUsed(TmpVGPR); + SB.prepare(); - // SubReg carries the "Kill" flag when SubReg == SuperReg. - unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); + // SubReg carries the "Kill" flag when SubReg == SB.SuperReg. + unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); - unsigned PerVGPR = 32; - unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; - int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; + // Per VGPR helper data + auto PVD = SB.getPerVGPRData(); - for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) { + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { unsigned TmpVGPRFlags = RegState::Undef; // Write sub registers into the VGPR - for (unsigned i = Offset * PerVGPR, - e = std::min((Offset + 1) * PerVGPR, NumSubRegs); + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); MachineInstrBuilder WriteLane = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), TmpVGPR) + BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + SB.TmpVGPR) .addReg(SubReg, SubKillState) - .addImm(i % PerVGPR) - .addReg(TmpVGPR, TmpVGPRFlags); + .addImm(i % PVD.PerVGPR) + .addReg(SB.TmpVGPR, TmpVGPRFlags); TmpVGPRFlags = 0; // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? - if (NumSubRegs > 1) { - // The last implicit use of the SuperReg carries the "Kill" flag. + if (SB.NumSubRegs > 1) { + // The last implicit use of the SB.SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; - if (i + 1 == NumSubRegs) - SuperKillState |= getKillRegState(IsKill); - WriteLane.addReg(SuperReg, RegState::Implicit | SuperKillState); + if (i + 1 == SB.NumSubRegs) + SuperKillState |= getKillRegState(SB.IsKill); + WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); } } // Write out VGPR - buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes, - RS, false); + SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); } + + SB.restore(); } MI->eraseFromParent(); - MFI->addToSpilledSGPRs(NumSubRegs); + SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); return true; } @@ -1276,75 +1399,59 @@ int Index, RegScavenger *RS, bool OnlyToVGPR) const { - MachineFunction *MF = MI->getParent()->getParent(); - MachineBasicBlock *MBB = MI->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, *RS); - ArrayRef VGPRSpills - = MFI->getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = + SB.MFI.getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - const DebugLoc &DL = MI->getDebugLoc(); - - Register SuperReg = MI->getOperand(0).getReg(); - - assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - unsigned EltSize = 4; - - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - - ArrayRef SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - if (SpillToVGPR) { - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + auto MIB = + BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } } else { - Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - RS->setRegUsed(TmpVGPR); + SB.prepare(); - unsigned PerVGPR = 32; - unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; - int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; + // Per VGPR helper data + auto PVD = SB.getPerVGPRData(); - for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) { + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { // Load in VGPR data - buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes, - RS, true); + SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true); // Unpack lanes - for (unsigned i = Offset * PerVGPR, - e = std::min((Offset + 1) * PerVGPR, NumSubRegs); + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); bool LastSubReg = (i + 1 == e); - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(TmpVGPR, getKillRegState(LastSubReg)) - .addImm(i); - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + auto MIB = BuildMI(SB.MBB, MI, SB.DL, + SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) + .addImm(i); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } } + + SB.restore(); } MI->eraseFromParent(); diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -112,6 +112,9 @@ ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] ; GCN: s_cmp_lg_u32 +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill ; GCN-NEXT: s_cbranch_scc1 [[LOOP]] diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -12,17 +12,23 @@ ; SPILL-TO-VGPR: v_writelane_b32 v40, s31, 1 ; NO-SPILL-TO-VGPR: v_mov_b32_e32 v0, s33 ; NO-SPILL-TO-VGPR: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR: s_mov_b64 exec, s[4:5] +; NO-SPILL-TO-VGPR: s_mov_b64 s[6:7], exec +; NO-SPILL-TO-VGPR: s_mov_b64 exec, 3 +; NO-SPILL-TO-VGPR: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR: v_writelane_b32 v1, s30, 0 ; NO-SPILL-TO-VGPR: v_writelane_b32 v1, s31, 1 ; NO-SPILL-TO-VGPR: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR: s_mov_b64 exec, s[6:7] ; GCN: s_swappc_b64 s[30:31], s[4:5] ; SPILL-TO-VGPR: v_readlane_b32 s4, v40, 0 ; SPILL-TO-VGPR: v_readlane_b32 s5, v40, 1 -; NO-SPILL-TO-VGPR: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v1, 0 -; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v1, 1 +; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v2, 0 +; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v2, 1 ; SPILL-TO-VGPR: v_readlane_b32 s33, v40, 2 ; NO-SPILL-TO-VGPR: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -761,12 +761,15 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; GCN-NEXT: v_writelane_b32 v0, s2, 0 ; GCN-NEXT: v_writelane_b32 v0, s3, 1 -; GCN-NEXT: s_mov_b64 s[2:3], exec -; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[2:3] +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s0, s1 @@ -842,13 +845,16 @@ ; GCN-NEXT: v_readlane_b32 s17, v31, 61 ; GCN-NEXT: v_readlane_b32 s18, v31, 62 ; GCN-NEXT: v_readlane_b32 s19, v31, 63 -; GCN-NEXT: s_mov_b64 s[0:1], exec +; GCN-NEXT: s_mov_b64 s[2:3], exec ; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s0, v0, 0 ; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_mov_b64 exec, s[2:3] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND @@ -887,5 +893,248 @@ ret void } +; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory. +; Additionally, v0 is live throughout the function. +define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { +; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s54, -1 +; GCN-NEXT: s_mov_b32 s55, 0xe8f000 +; GCN-NEXT: s_add_u32 s52, s52, s3 +; GCN-NEXT: s_addc_u32 s53, s53, 0 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 0 +; GCN-NEXT: v_writelane_b32 v31, s5, 1 +; GCN-NEXT: v_writelane_b32 v31, s6, 2 +; GCN-NEXT: v_writelane_b32 v31, s7, 3 +; GCN-NEXT: v_writelane_b32 v31, s8, 4 +; GCN-NEXT: v_writelane_b32 v31, s9, 5 +; GCN-NEXT: v_writelane_b32 v31, s10, 6 +; GCN-NEXT: v_writelane_b32 v31, s11, 7 +; GCN-NEXT: v_writelane_b32 v31, s12, 8 +; GCN-NEXT: v_writelane_b32 v31, s13, 9 +; GCN-NEXT: v_writelane_b32 v31, s14, 10 +; GCN-NEXT: v_writelane_b32 v31, s15, 11 +; GCN-NEXT: v_writelane_b32 v31, s16, 12 +; GCN-NEXT: v_writelane_b32 v31, s17, 13 +; GCN-NEXT: v_writelane_b32 v31, s18, 14 +; GCN-NEXT: v_writelane_b32 v31, s19, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 16 +; GCN-NEXT: v_writelane_b32 v31, s5, 17 +; GCN-NEXT: v_writelane_b32 v31, s6, 18 +; GCN-NEXT: v_writelane_b32 v31, s7, 19 +; GCN-NEXT: v_writelane_b32 v31, s8, 20 +; GCN-NEXT: v_writelane_b32 v31, s9, 21 +; GCN-NEXT: v_writelane_b32 v31, s10, 22 +; GCN-NEXT: v_writelane_b32 v31, s11, 23 +; GCN-NEXT: v_writelane_b32 v31, s12, 24 +; GCN-NEXT: v_writelane_b32 v31, s13, 25 +; GCN-NEXT: v_writelane_b32 v31, s14, 26 +; GCN-NEXT: v_writelane_b32 v31, s15, 27 +; GCN-NEXT: v_writelane_b32 v31, s16, 28 +; GCN-NEXT: v_writelane_b32 v31, s17, 29 +; GCN-NEXT: v_writelane_b32 v31, s18, 30 +; GCN-NEXT: v_writelane_b32 v31, s19, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 32 +; GCN-NEXT: v_writelane_b32 v31, s5, 33 +; GCN-NEXT: v_writelane_b32 v31, s6, 34 +; GCN-NEXT: v_writelane_b32 v31, s7, 35 +; GCN-NEXT: v_writelane_b32 v31, s8, 36 +; GCN-NEXT: v_writelane_b32 v31, s9, 37 +; GCN-NEXT: v_writelane_b32 v31, s10, 38 +; GCN-NEXT: v_writelane_b32 v31, s11, 39 +; GCN-NEXT: v_writelane_b32 v31, s12, 40 +; GCN-NEXT: v_writelane_b32 v31, s13, 41 +; GCN-NEXT: v_writelane_b32 v31, s14, 42 +; GCN-NEXT: v_writelane_b32 v31, s15, 43 +; GCN-NEXT: v_writelane_b32 v31, s16, 44 +; GCN-NEXT: v_writelane_b32 v31, s17, 45 +; GCN-NEXT: v_writelane_b32 v31, s18, 46 +; GCN-NEXT: v_writelane_b32 v31, s19, 47 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 48 +; GCN-NEXT: v_writelane_b32 v31, s5, 49 +; GCN-NEXT: v_writelane_b32 v31, s6, 50 +; GCN-NEXT: v_writelane_b32 v31, s7, 51 +; GCN-NEXT: v_writelane_b32 v31, s8, 52 +; GCN-NEXT: v_writelane_b32 v31, s9, 53 +; GCN-NEXT: v_writelane_b32 v31, s10, 54 +; GCN-NEXT: v_writelane_b32 v31, s11, 55 +; GCN-NEXT: v_writelane_b32 v31, s12, 56 +; GCN-NEXT: v_writelane_b32 v31, s13, 57 +; GCN-NEXT: v_writelane_b32 v31, s14, 58 +; GCN-NEXT: v_writelane_b32 v31, s15, 59 +; GCN-NEXT: v_writelane_b32 v31, s16, 60 +; GCN-NEXT: v_writelane_b32 v31, s17, 61 +; GCN-NEXT: v_writelane_b32 v31, s18, 62 +; GCN-NEXT: v_writelane_b32 v31, s19, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[2:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 +; GCN-NEXT: v_writelane_b32 v0, s2, 0 +; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_lg_u32 s0, s1 +; GCN-NEXT: s_cbranch_scc1 BB3_2 +; GCN-NEXT: ; %bb.1: ; %bb0 +; GCN-NEXT: v_readlane_b32 s36, v31, 32 +; GCN-NEXT: v_readlane_b32 s37, v31, 33 +; GCN-NEXT: v_readlane_b32 s38, v31, 34 +; GCN-NEXT: v_readlane_b32 s39, v31, 35 +; GCN-NEXT: v_readlane_b32 s40, v31, 36 +; GCN-NEXT: v_readlane_b32 s41, v31, 37 +; GCN-NEXT: v_readlane_b32 s42, v31, 38 +; GCN-NEXT: v_readlane_b32 s43, v31, 39 +; GCN-NEXT: v_readlane_b32 s44, v31, 40 +; GCN-NEXT: v_readlane_b32 s45, v31, 41 +; GCN-NEXT: v_readlane_b32 s46, v31, 42 +; GCN-NEXT: v_readlane_b32 s47, v31, 43 +; GCN-NEXT: v_readlane_b32 s48, v31, 44 +; GCN-NEXT: v_readlane_b32 s49, v31, 45 +; GCN-NEXT: v_readlane_b32 s50, v31, 46 +; GCN-NEXT: v_readlane_b32 s51, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 16 +; GCN-NEXT: v_readlane_b32 s1, v31, 17 +; GCN-NEXT: v_readlane_b32 s2, v31, 18 +; GCN-NEXT: v_readlane_b32 s3, v31, 19 +; GCN-NEXT: v_readlane_b32 s4, v31, 20 +; GCN-NEXT: v_readlane_b32 s5, v31, 21 +; GCN-NEXT: v_readlane_b32 s6, v31, 22 +; GCN-NEXT: v_readlane_b32 s7, v31, 23 +; GCN-NEXT: v_readlane_b32 s8, v31, 24 +; GCN-NEXT: v_readlane_b32 s9, v31, 25 +; GCN-NEXT: v_readlane_b32 s10, v31, 26 +; GCN-NEXT: v_readlane_b32 s11, v31, 27 +; GCN-NEXT: v_readlane_b32 s12, v31, 28 +; GCN-NEXT: v_readlane_b32 s13, v31, 29 +; GCN-NEXT: v_readlane_b32 s14, v31, 30 +; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s16, v31, 0 +; GCN-NEXT: v_readlane_b32 s17, v31, 1 +; GCN-NEXT: v_readlane_b32 s18, v31, 2 +; GCN-NEXT: v_readlane_b32 s19, v31, 3 +; GCN-NEXT: v_readlane_b32 s20, v31, 4 +; GCN-NEXT: v_readlane_b32 s21, v31, 5 +; GCN-NEXT: v_readlane_b32 s22, v31, 6 +; GCN-NEXT: v_readlane_b32 s23, v31, 7 +; GCN-NEXT: v_readlane_b32 s24, v31, 8 +; GCN-NEXT: v_readlane_b32 s25, v31, 9 +; GCN-NEXT: v_readlane_b32 s26, v31, 10 +; GCN-NEXT: v_readlane_b32 s27, v31, 11 +; GCN-NEXT: v_readlane_b32 s28, v31, 12 +; GCN-NEXT: v_readlane_b32 s29, v31, 13 +; GCN-NEXT: v_readlane_b32 s30, v31, 14 +; GCN-NEXT: v_readlane_b32 s31, v31, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def v0 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[16:31] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s4, v31, 48 +; GCN-NEXT: v_readlane_b32 s5, v31, 49 +; GCN-NEXT: v_readlane_b32 s6, v31, 50 +; GCN-NEXT: v_readlane_b32 s7, v31, 51 +; GCN-NEXT: v_readlane_b32 s8, v31, 52 +; GCN-NEXT: v_readlane_b32 s9, v31, 53 +; GCN-NEXT: v_readlane_b32 s10, v31, 54 +; GCN-NEXT: v_readlane_b32 s11, v31, 55 +; GCN-NEXT: v_readlane_b32 s12, v31, 56 +; GCN-NEXT: v_readlane_b32 s13, v31, 57 +; GCN-NEXT: v_readlane_b32 s14, v31, 58 +; GCN-NEXT: v_readlane_b32 s15, v31, 59 +; GCN-NEXT: v_readlane_b32 s16, v31, 60 +; GCN-NEXT: v_readlane_b32 s17, v31, 61 +; GCN-NEXT: v_readlane_b32 s18, v31, 62 +; GCN-NEXT: v_readlane_b32 s19, v31, 63 +; GCN-NEXT: s_mov_b64 s[2:3], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s0, v1, 0 +; GCN-NEXT: v_readlane_b32 s1, v1, 1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_mov_b64 exec, s[2:3] +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[36:51] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[0:1] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use v0 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: BB3_2: ; %ret +; GCN-NEXT: s_endpgm + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "", "~{v[8:15]}" () #0 + call void asm sideeffect "", "~{v[16:23]}" () #0 + call void asm sideeffect "", "~{v[24:27]}"() #0 + call void asm sideeffect "", "~{v[28:29]}"() #0 + call void asm sideeffect "", "~{v30}"() #0 + + %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0 + call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0 + call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0 + br label %ret + +ret: + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" } diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -1,183 +1,9 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,MUBUF %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN32,MUBUF %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,FLATSCR %s - - -# CHECK-LABEL: name: check_spill - -# FLATSCR: $sgpr33 = S_MOV_B32 0 -# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc -# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc - -# S32 with kill -# CHECK: V_WRITELANE -# CHECK: $sgpr12 = S_MOV_B32 $exec_lo -# CHECK: $exec_lo = S_MOV_B32 1 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 4 -# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12 - -# S32 without kill -# CHECK: V_WRITELANE -# CHECK: $sgpr12 = S_MOV_B32 $exec_lo -# CHECK: $exec_lo = S_MOV_B32 1 -# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 4 -# CHECK: $sgpr12 = V_READLANE - -# S64 with kill -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 3 -# GCN64: $exec = S_MOV_B64 3 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 8 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S64 without kill -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 3 -# GCN64: $exec = S_MOV_B64 3 -# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 8 -# GCN32: $exec_lo = S_MOV_B32 $sgpr12 -# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13 -# GCN64: $sgpr13 = V_READLANE -# CHECK: $sgpr12 = V_READLANE - -# S96 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 7 -# GCN64: $exec = S_MOV_B64 7 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 16 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S128 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 15 -# GCN64: $exec = S_MOV_B64 15 -# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 28 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S160 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 31 -# GCN64: $exec = S_MOV_B64 31 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 44 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 44 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S256 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 255 -# GCN64: $exec = S_MOV_B64 255 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 64 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 64 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S512 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 65535 -# GCN64: $exec = S_MOV_B64 65535 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 96 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 96 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 - -# S1024 -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN32: $sgpr64 = S_MOV_B32 $exec_lo -# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 4294967295 -# GCN64: $exec = S_MOV_B64 4294967295 -# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 -# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 160 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 -# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65 +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN64-MUBUF %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN32-MUBUF %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN64-FLATSCR %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -filetype=obj -verify-machineinstrs -start-before=prologepilog %s -o /dev/null +# Check not crashing when emitting ISA --- | @@ -228,6 +54,850 @@ bb.0: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 + ; GCN32-LABEL: name: check_spill + ; GCN32: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 + ; GCN32: $sgpr33 = S_MOV_B32 0 + ; GCN32: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 15, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 31, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 255, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 65535, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 + ; GCN32: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32: $sgpr64 = S_OR_SAVEEXEC_B32 4294967295, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 -1 + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 + ; FLATSCR-LABEL: name: check_spill + ; FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 + ; FLATSCR: $sgpr33 = S_MOV_B32 0 + ; FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc + ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc + ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo + ; FLATSCR: $exec_lo = S_MOV_B32 1 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: $exec_lo = S_MOV_B32 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 + ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 + ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 + ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo + ; FLATSCR: $exec_lo = S_MOV_B32 1 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 + ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 3 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 3 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 7 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 15 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 31 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 255 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 65535 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 + ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec + ; FLATSCR: $exec = S_MOV_B64 4294967295 + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 -1 + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65 + ; GCN64-MUBUF-LABEL: name: check_spill + ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 + ; GCN64-MUBUF: $sgpr33 = S_MOV_B32 0 + ; GCN64-MUBUF: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr30 = S_MOV_B32 4294967295, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr0 + ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr1 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN64-MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr1 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr2 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN64-MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr2 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr3 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN64-MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr3 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 7, implicit-def $vgpr4 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr4, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr13, 1, $vgpr4, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr14, 2, $vgpr4, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN64-MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr4 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 15, implicit-def $vgpr5 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr13, 1, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr14, 2, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr15, 3, $vgpr5, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN64-MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr5 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 31, implicit-def $vgpr6 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr13, 1, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr14, 2, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr15, 3, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr16, 4, $vgpr6, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN64-MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr6 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 255, implicit-def $vgpr7 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr13, 1, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr14, 2, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr15, 3, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr16, 4, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr17, 5, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr18, 6, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr19, 7, $vgpr7, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN64-MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr7 + ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 65535, implicit-def $vgpr8 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr13, 1, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr14, 2, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr15, 3, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr16, 4, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr17, 5, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr18, 6, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr19, 7, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr20, 8, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr21, 9, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr22, 10, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr23, 11, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr24, 12, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr25, 13, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr26, 14, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr27, 15, $vgpr8, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN64-MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr8 + ; GCN64-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF: $exec = S_MOV_B64 4294967295, implicit-def $vgpr9 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr65, 1, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr66, 2, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr67, 3, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr68, 4, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr69, 5, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr70, 6, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr71, 7, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr72, 8, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr73, 9, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr74, 10, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr75, 11, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr76, 12, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr77, 13, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr78, 14, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr79, 15, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr80, 16, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr81, 17, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr82, 18, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr83, 19, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr84, 20, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr85, 21, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr86, 22, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr87, 23, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr88, 24, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr89, 25, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr90, 26, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr91, 27, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr92, 28, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr93, 29, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr94, 30, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr95, 31, $vgpr9, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN64-MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr9 + ; GCN32-MUBUF-LABEL: name: check_spill + ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 + ; GCN32-MUBUF: $sgpr33 = S_MOV_B32 0 + ; GCN32-MUBUF: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 + ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr0 + ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32-MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr1 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32-MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr2 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr3 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32-MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr3 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 7, implicit-def $vgpr4 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr4, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr13, 1, $vgpr4, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr14, 2, $vgpr4, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN32-MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr4 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 15, implicit-def $vgpr5 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr13, 1, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr14, 2, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr15, 3, $vgpr5, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN32-MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr5 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 31, implicit-def $vgpr6 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr13, 1, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr14, 2, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr15, 3, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr16, 4, $vgpr6, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN32-MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr6 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 255, implicit-def $vgpr7 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr13, 1, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr14, 2, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr15, 3, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr16, 4, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr17, 5, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr18, 6, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr19, 7, $vgpr7, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN32-MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr7 + ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr8 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr13, 1, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr14, 2, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr15, 3, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr16, 4, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr17, 5, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr18, 6, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr19, 7, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr20, 8, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr21, 9, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr22, 10, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr23, 11, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr24, 12, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr25, 13, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr26, 14, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr27, 15, $vgpr8, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN32-MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr8 + ; GCN32-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr9 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr65, 1, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr66, 2, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr67, 3, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr68, 4, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr69, 5, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr70, 6, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr71, 7, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr72, 8, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr73, 9, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr74, 10, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr75, 11, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr76, 12, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr77, 13, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr78, 14, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr79, 15, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr80, 16, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr81, 17, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr82, 18, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr83, 19, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr84, 20, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr85, 21, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr86, 22, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr87, 23, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr88, 24, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr89, 25, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr90, 26, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr91, 27, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr92, 28, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr93, 29, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr94, 30, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr95, 31, $vgpr9, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN32-MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr9 + ; GCN64-FLATSCR-LABEL: name: check_spill + ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 + ; GCN64-FLATSCR: $sgpr33 = S_MOV_B32 0 + ; GCN64-FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc + ; GCN64-FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc + ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr0 + ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr1 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN64-FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr1 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr2 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; GCN64-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr2 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr3 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; GCN64-FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr3 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 7, implicit-def $vgpr4 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr4 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr4, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR: $vgpr4 = V_WRITELANE_B32 $sgpr13, 1, $vgpr4, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR: $vgpr4 = V_WRITELANE_B32 $sgpr14, 2, $vgpr4, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5) + ; GCN64-FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr4 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 15, implicit-def $vgpr5 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr13, 1, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr14, 2, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr15, 3, $vgpr5, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; GCN64-FLATSCR: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr5 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 31, implicit-def $vgpr6 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr13, 1, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr14, 2, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr15, 3, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr16, 4, $vgpr6, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5) + ; GCN64-FLATSCR: $vgpr6 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr6 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 255, implicit-def $vgpr7 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr13, 1, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr14, 2, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr15, 3, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr16, 4, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr17, 5, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr18, 6, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr19, 7, $vgpr7, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5) + ; GCN64-FLATSCR: $vgpr7 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr7 + ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 65535, implicit-def $vgpr8 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr8, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr13, 1, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr14, 2, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr15, 3, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr16, 4, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr17, 5, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr18, 6, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr19, 7, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr20, 8, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr21, 9, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr22, 10, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr23, 11, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr24, 12, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr25, 13, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr26, 14, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr27, 15, $vgpr8, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr8, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5) + ; GCN64-FLATSCR: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr8 + ; GCN64-FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF + ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR: $exec = S_MOV_B64 4294967295, implicit-def $vgpr9 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr9, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr65, 1, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr66, 2, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr67, 3, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr68, 4, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr69, 5, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr70, 6, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr71, 7, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr72, 8, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr73, 9, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr74, 10, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr75, 11, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr76, 12, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr77, 13, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr78, 14, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr79, 15, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr80, 16, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr81, 17, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr82, 18, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr83, 19, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr84, 20, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr85, 21, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr86, 22, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr87, 23, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr88, 24, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr89, 25, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr90, 26, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr91, 27, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr92, 28, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr93, 29, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr94, 30, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr95, 31, $vgpr9, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr9, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5) + ; GCN64-FLATSCR: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr9 renamable $sgpr12 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 @@ -258,161 +928,6 @@ renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 - -# CHECK-LABEL: name: check_reload - -# FLATSCR: $sgpr33 = S_MOV_B32 0 -# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc -# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc - -# S32 -# CHECK: $sgpr12 = S_MOV_B32 $exec_lo -# CHECK: $exec_lo = S_MOV_B32 1 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4 -# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12 -# CHECK: $sgpr12 = V_READLANE - -# S64 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 3 -# GCN64: $exec = S_MOV_B64 3 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE - -# S96 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 7 -# GCN64: $exec = S_MOV_B64 7 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 16 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE - -# S128 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 15 -# GCN64: $exec = S_MOV_B64 15 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 28 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE - -# S160 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 31 -# GCN64: $exec = S_MOV_B64 31 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 44 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE -# CHECK: $sgpr16 = V_READLANE - -# S256 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 255 -# GCN64: $exec = S_MOV_B64 255 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 64 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE -# CHECK: $sgpr16 = V_READLANE -# CHECK: $sgpr17 = V_READLANE -# CHECK: $sgpr18 = V_READLANE -# CHECK: $sgpr19 = V_READLANE - -# S512 -# GCN32: $sgpr12 = S_MOV_B32 $exec_lo -# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 65535 -# GCN64: $exec = S_MOV_B64 65535 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 96 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 -# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 -# CHECK: $sgpr12 = V_READLANE -# CHECK: $sgpr13 = V_READLANE -# CHECK: $sgpr14 = V_READLANE -# CHECK: $sgpr15 = V_READLANE -# CHECK: $sgpr16 = V_READLANE -# CHECK: $sgpr17 = V_READLANE -# CHECK: $sgpr18 = V_READLANE -# CHECK: $sgpr19 = V_READLANE -# CHECK: $sgpr20 = V_READLANE -# CHECK: $sgpr21 = V_READLANE -# CHECK: $sgpr22 = V_READLANE -# CHECK: $sgpr23 = V_READLANE -# CHECK: $sgpr24 = V_READLANE -# CHECK: $sgpr25 = V_READLANE -# CHECK: $sgpr26 = V_READLANE -# CHECK: $sgpr27 = V_READLANE - -# S1024 -# GCN32: $sgpr64 = S_MOV_B32 $exec_lo -# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 4294967295 -# GCN64: $exec = S_MOV_B64 4294967295 -# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 -# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 -# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65 -# CHECK: $sgpr64 = V_READLANE -# CHECK: $sgpr65 = V_READLANE -# CHECK: $sgpr66 = V_READLANE -# CHECK: $sgpr67 = V_READLANE -# CHECK: $sgpr68 = V_READLANE -# CHECK: $sgpr69 = V_READLANE -# CHECK: $sgpr70 = V_READLANE -# CHECK: $sgpr71 = V_READLANE -# CHECK: $sgpr72 = V_READLANE -# CHECK: $sgpr73 = V_READLANE -# CHECK: $sgpr74 = V_READLANE -# CHECK: $sgpr75 = V_READLANE -# CHECK: $sgpr76 = V_READLANE -# CHECK: $sgpr77 = V_READLANE -# CHECK: $sgpr78 = V_READLANE -# CHECK: $sgpr79 = V_READLANE -# CHECK: $sgpr80 = V_READLANE -# CHECK: $sgpr81 = V_READLANE -# CHECK: $sgpr82 = V_READLANE -# CHECK: $sgpr83 = V_READLANE -# CHECK: $sgpr84 = V_READLANE -# CHECK: $sgpr85 = V_READLANE -# CHECK: $sgpr86 = V_READLANE -# CHECK: $sgpr87 = V_READLANE -# CHECK: $sgpr88 = V_READLANE -# CHECK: $sgpr89 = V_READLANE -# CHECK: $sgpr90 = V_READLANE -# CHECK: $sgpr91 = V_READLANE -# CHECK: $sgpr92 = V_READLANE -# CHECK: $sgpr93 = V_READLANE -# CHECK: $sgpr94 = V_READLANE -# CHECK: $sgpr95 = V_READLANE - --- name: check_reload tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll --- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -7,14 +7,16 @@ ; Make sure we are handling hazards correctly. ; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 -; SGPR-NEXT: s_mov_b64 exec, s[0:1] ; SGPR-NEXT: s_waitcnt vmcnt(0) ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0 ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1 ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2 ; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3 -; SGPR-NEXT: s_nop 4 -; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0 +; SGPR-NEXT: buffer_load_dword [[VHI]], off, s[96:99], 0 +; SGPR-NEXT: s_waitcnt vmcnt(0) +; SGPR-NEXT: s_mov_b64 exec, s[4:5] +; SGPR-NEXT: s_nop 1 +; SGPR-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; ALL: s_endpgm define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -14,11 +14,11 @@ ; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]] +; TOVMEM: s_mov_b64 [[COPY_EXEC:s\[[0-9]+:[0-9]+\]]], exec +; TOVMEM: s_mov_b64 exec, 1 ; TOVMEM: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 -; TOVMEM: s_mov_b32 [[COPY_EXEC_LO:s[0-9]+]], exec_lo -; TOVMEM: s_mov_b32 exec_lo, 1 ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Spill -; TOVMEM: s_mov_b32 exec_lo, [[COPY_EXEC_LO]] +; TOVMEM: s_mov_b64 exec, [[COPY_EXEC]] ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -46,6 +46,7 @@ ; CHECK-LABEL: test_limited_sgpr ; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] +; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32 ; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] ; GFX6: NumSgprs: 48 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +; Spill an SGPR to scratch without having spare SGPRs available to save exec + +define amdgpu_kernel void @test() #1 { +; GFX10-LABEL: test: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s10, -1 +; GFX10-NEXT: s_mov_b32 s11, 0x31e16000 +; GFX10-NEXT: s_add_u32 s8, s8, s1 +; GFX10-NEXT: s_addc_u32 s9, s9, 0 +; GFX10-NEXT: ;;#ASMSTART +; GFX10-NEXT: ; def s[0:7] +; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: ;;#ASMSTART +; GFX10-NEXT: ; def s[8:12] +; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_not_b64 exec, exec +; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX10-NEXT: v_writelane_b32 v0, s8, 0 +; GFX10-NEXT: v_writelane_b32 v0, s9, 1 +; GFX10-NEXT: v_writelane_b32 v0, s10, 2 +; GFX10-NEXT: v_writelane_b32 v0, s11, 3 +; GFX10-NEXT: v_writelane_b32 v0, s12, 4 +; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_not_b64 exec, exec +; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_not_b64 exec, exec +; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_not_b64 exec, exec +; GFX10-NEXT: ;;#ASMSTART +; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: ;;#ASMSTART +; GFX10-NEXT: ; use s[0:7] +; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_mov_b64 s[6:7], exec +; GFX10-NEXT: s_mov_b64 exec, 31 +; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_readlane_b32 s0, v0, 0 +; GFX10-NEXT: v_readlane_b32 s1, v0, 1 +; GFX10-NEXT: v_readlane_b32 s2, v0, 2 +; GFX10-NEXT: v_readlane_b32 s3, v0, 3 +; GFX10-NEXT: v_readlane_b32 s4, v0, 4 +; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b64 exec, s[6:7] +; GFX10-NEXT: ;;#ASMSTART +; GFX10-NEXT: ; use s[0:3] +; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_endpgm + %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () #0 + %wide.sgpr2 = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0 + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0 + call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr2) #0 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" } diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -46,27 +46,31 @@ ; GFX9: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $vcc = IMPLICIT_DEF + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc - ; GFX9: $vcc = S_MOV_B64 $exec - ; GFX9: $exec = S_MOV_B64 3 - ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $exec = S_MOV_B64 $vcc - ; GFX9: $vcc_hi = V_READLANE_B32 $vgpr0, 1 - ; GFX9: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0 - ; GFX9: $vcc = IMPLICIT_DEF - ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc - ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc - ; GFX9: $vcc = S_MOV_B64 $exec - ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $exec = S_MOV_B64 killed $vcc - ; GFX9: $vcc = S_MOV_B64 $exec - ; GFX9: $exec = S_MOV_B64 3 - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; GFX9: $exec = S_MOV_B64 killed $vcc - ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc - ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr0 + ; GFX9: $vcc = IMPLICIT_DEF + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc + ; GFX9: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr1 + ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr2 + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr2, 0, implicit-def $vcc + ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr2, 1 + ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr2 ; GFX10-LABEL: name: check_vcc ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX10: $sgpr33 = S_MOV_B32 0 @@ -77,27 +81,31 @@ ; GFX10: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10: $vcc = IMPLICIT_DEF + ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc - ; GFX10: $vcc = S_MOV_B64 $exec - ; GFX10: $exec = S_MOV_B64 3 - ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $exec = S_MOV_B64 $vcc - ; GFX10: $vcc_hi = V_READLANE_B32 $vgpr0, 1 - ; GFX10: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0 - ; GFX10: $vcc = IMPLICIT_DEF - ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc - ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc - ; GFX10: $vcc = S_MOV_B64 $exec - ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $exec = S_MOV_B64 killed $vcc - ; GFX10: $vcc = S_MOV_B64 $exec - ; GFX10: $exec = S_MOV_B64 3 - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; GFX10: $exec = S_MOV_B64 killed $vcc - ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc - ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr0 + ; GFX10: $vcc = IMPLICIT_DEF + ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc + ; GFX10: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX10: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr1 + ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr2 + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr2, 0, implicit-def $vcc + ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr2, 1 + ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr2 $vcc = IMPLICIT_DEF SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32