Index: llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -37,6 +37,7 @@ const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; + SlotIndexes *Indexes = nullptr; // Save and Restore blocks of the current function. Typically there is a // single save block, unless Windows EH funclets are involved. @@ -74,7 +75,7 @@ /// Insert spill code for the callee-saved registers used in the function. static void insertCSRSaves(MachineBasicBlock &SaveBlock, - ArrayRef CSI, + ArrayRef CSI, SlotIndexes *Indexes, LiveIntervals *LIS) { MachineFunction &MF = *SaveBlock.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -103,13 +104,14 @@ TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), RC, TRI); - if (LIS) { + if (Indexes) { assert(std::distance(MIS.begin(), I) == 1); MachineInstr &Inst = *std::prev(I); + Indexes->insertMachineInstrInMaps(Inst); + } - LIS->InsertMachineInstrInMaps(Inst); + if (LIS) LIS->removeAllRegUnitsForPhysReg(Reg); - } } } } @@ -117,7 +119,7 @@ /// Insert restore code for the callee-saved registers used in the function. static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef CSI, - LiveIntervals *LIS) { + SlotIndexes *Indexes, LiveIntervals *LIS) { MachineFunction &MF = *RestoreBlock.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); @@ -141,11 +143,13 @@ // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. - if (LIS) { + if (Indexes) { MachineInstr &Inst = *std::prev(I); - LIS->InsertMachineInstrInMaps(Inst); - LIS->removeAllRegUnitsForPhysReg(Reg); + Indexes->insertMachineInstrInMaps(Inst); } + + if (LIS) + LIS->removeAllRegUnitsForPhysReg(Reg); } } } @@ -228,14 +232,14 @@ if (!CSI.empty()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) - insertCSRSaves(*SaveBlock, CSI, LIS); + insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); // Add live ins to save blocks. assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); updateLiveness(MF, CSI); for (MachineBasicBlock *RestoreBlock : RestoreBlocks) - insertCSRRestores(*RestoreBlock, CSI, LIS); + insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS); return true; } } @@ -249,6 +253,7 @@ TRI = &TII->getRegisterInfo(); LIS = getAnalysisIfAvailable(); + Indexes = getAnalysisIfAvailable(); assert(SaveBlocks.empty() && RestoreBlocks.empty()); @@ -293,8 +298,8 @@ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { NewReservedRegs = true; - bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, - nullptr, LIS); + bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( + MI, FI, nullptr, Indexes, LIS); (void)Spilled; assert(Spilled && "failed to spill SGPR to VGPR when allocated"); SpillFIs.set(FI); Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -137,14 +137,12 @@ bool IsLoad, bool IsKill = true) const; /// If \p OnlyToVGPR is true, this will only succeed if this - bool spillSGPR(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS, - LiveIntervals *LIS = nullptr, + bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, bool OnlyToVGPR = false) const; - bool restoreSGPR(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS, - LiveIntervals *LIS = nullptr, + bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, bool OnlyToVGPR = false) const; bool spillEmergencySGPR(MachineBasicBlock::iterator MI, @@ -157,6 +155,7 @@ bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr) const; StringRef getRegAsmName(MCRegister Reg) const override; Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1666,11 +1666,9 @@ } } -bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, - int Index, - RegScavenger *RS, - LiveIntervals *LIS, - bool OnlyToVGPR) const { +bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, + RegScavenger *RS, SlotIndexes *Indexes, + LiveIntervals *LIS, bool OnlyToVGPR) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); ArrayRef VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index); @@ -1702,11 +1700,11 @@ .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR); - if (LIS) { + if (Indexes) { if (i == 0) - LIS->ReplaceMachineInstrInMaps(*MI, *MIB); + Indexes->replaceMachineInstrInMaps(*MI, *MIB); else - LIS->InsertMachineInstrInMaps(*MIB); + Indexes->insertMachineInstrInMaps(*MIB); } if (i == 0 && SB.NumSubRegs > 1) { @@ -1751,11 +1749,11 @@ .addReg(SB.TmpVGPR, TmpVGPRFlags); TmpVGPRFlags = 0; - if (LIS) { + if (Indexes) { if (i == 0) - LIS->ReplaceMachineInstrInMaps(*MI, *WriteLane); + Indexes->replaceMachineInstrInMaps(*MI, *WriteLane); else - LIS->InsertMachineInstrInMaps(*WriteLane); + Indexes->insertMachineInstrInMaps(*WriteLane); } // There could be undef components of a spilled super register. @@ -1785,11 +1783,9 @@ return true; } -bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, - int Index, - RegScavenger *RS, - LiveIntervals *LIS, - bool OnlyToVGPR) const { +bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, + RegScavenger *RS, SlotIndexes *Indexes, + LiveIntervals *LIS, bool OnlyToVGPR) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); ArrayRef VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index); @@ -1811,13 +1807,12 @@ .addImm(Spill.Lane); if (SB.NumSubRegs > 1 && i == 0) MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); - if (LIS) { + if (Indexes) { if (i == e - 1) - LIS->ReplaceMachineInstrInMaps(*MI, *MIB); + Indexes->replaceMachineInstrInMaps(*MI, *MIB); else - LIS->InsertMachineInstrInMaps(*MIB); + Indexes->insertMachineInstrInMaps(*MIB); } - } } else { SB.prepare(); @@ -1845,11 +1840,11 @@ .addImm(i); if (SB.NumSubRegs > 1 && i == 0) MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); - if (LIS) { + if (Indexes) { if (i == e - 1) - LIS->ReplaceMachineInstrInMaps(*MI, *MIB); + Indexes->replaceMachineInstrInMaps(*MI, *MIB); else - LIS->InsertMachineInstrInMaps(*MIB); + Indexes->insertMachineInstrInMaps(*MIB); } } } @@ -1938,10 +1933,8 @@ /// a VGPR and the stack slot can be safely eliminated when all other users are /// handled. bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( - MachineBasicBlock::iterator MI, - int FI, - RegScavenger *RS, - LiveIntervals *LIS) const { + MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + SlotIndexes *Indexes, LiveIntervals *LIS) const { switch (MI->getOpcode()) { case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: @@ -1953,7 +1946,7 @@ case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: - return spillSGPR(MI, FI, RS, LIS, true); + return spillSGPR(MI, FI, RS, Indexes, LIS, true); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: @@ -1964,7 +1957,7 @@ case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: - return restoreSGPR(MI, FI, RS, LIS, true); + return restoreSGPR(MI, FI, RS, Indexes, LIS, true); default: llvm_unreachable("not an SGPR spill instruction"); } Index: llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -sgpr-regalloc=fast -vgpr-regalloc=fast -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Make sure there's no verifier error from improperly updated +; SlotIndexes if regalloc fast is manually used. + +declare void @foo() + +define amdgpu_kernel void @kernel() { +; GCN-LABEL: kernel: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s38, -1 +; GCN-NEXT: s_mov_b32 s39, 0xe00000 +; GCN-NEXT: v_writelane_b32 v40, s4, 0 +; GCN-NEXT: s_add_u32 s36, s36, s11 +; GCN-NEXT: v_writelane_b32 v40, s5, 1 +; GCN-NEXT: s_addc_u32 s37, s37, 0 +; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] +; GCN-NEXT: v_readlane_b32 s0, v40, 0 +; GCN-NEXT: s_mov_b32 s13, s9 +; GCN-NEXT: s_mov_b32 s12, s8 +; GCN-NEXT: v_readlane_b32 s1, v40, 1 +; GCN-NEXT: s_add_u32 s8, s0, 36 +; GCN-NEXT: s_addc_u32 s9, s1, 0 +; GCN-NEXT: s_getpc_b64 s[0:1] +; GCN-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 +; GCN-NEXT: s_mov_b32 s14, s10 +; GCN-NEXT: s_mov_b64 s[10:11], s[6:7] +; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] +; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] +; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 +; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] +; GCN-NEXT: s_mov_b32 s32, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: s_endpgm + call void @foo() + ret void +}