diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -118,6 +118,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SpillReg, int FI) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR @@ -129,7 +130,7 @@ PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); LiveRegs.addReg(SpillReg); - TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, true, + TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); LiveRegs.removeReg(SpillReg); @@ -139,6 +140,7 @@ const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SpillReg, int FI) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR @@ -149,7 +151,7 @@ MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); - TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, false, + TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); } @@ -745,7 +747,8 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, *Reg.FI); + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, + *Reg.FI); } // VGPRs used for Whole Wave Mode @@ -759,7 +762,7 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI); + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); } if (ScratchExecCopy) { @@ -785,7 +788,7 @@ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(FramePtrReg); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, FramePtrFI); } @@ -803,7 +806,7 @@ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(BasePtrReg); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, BasePtrFI); } @@ -996,7 +999,7 @@ MRI, LiveRegs, AMDGPU::VGPR_32RegClass); if (!TmpVGPR) report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR, + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, FramePtrFI); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) .addReg(TmpVGPR, RegState::Kill); @@ -1022,7 +1025,7 @@ MRI, LiveRegs, AMDGPU::VGPR_32RegClass); if (!TmpVGPR) report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR, + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, BasePtrFI); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) .addReg(TmpVGPR, RegState::Kill); @@ -1048,7 +1051,7 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, *Reg.FI); } @@ -1062,7 +1065,7 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); } if (ScratchExecCopy) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -349,7 +349,8 @@ // When lowering spill pseudos, the RegScavenger should be set. // For creating spill instructions during frame lowering, where no scavenger // is available, LiveRegs can be used. - void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, + void buildSpillLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -915,13 +915,11 @@ } static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - int Index, - unsigned Lane, - unsigned ValueReg, - bool IsKill) { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MI->getParent()->getParent(); + int Index, unsigned Lane, + unsigned ValueReg, bool IsKill) { + MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -939,8 +937,8 @@ unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_ACCVGPR_READ_B32_e64; - auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) - .addReg(Src, getKillRegState(IsKill)); + auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) + .addReg(Src, getKillRegState(IsKill)); MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); return MIB; } @@ -964,7 +962,7 @@ return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); - if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr()) + if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr()) return true; MachineInstrBuilder NewMI = @@ -1021,20 +1019,19 @@ } void SIRegisterInfo::buildSpillLoadStore( - MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index, - Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg, - int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS, - LivePhysRegs *LiveRegs) const { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, + MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, + RegScavenger *RS, LivePhysRegs *LiveRegs) const { assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both"); - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MI->getParent()->getParent(); + MachineFunction *MF = MBB.getParent(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const MCInstrDesc *Desc = &TII->get(LoadStoreOp); - const DebugLoc &DL = MI->getDebugLoc(); + const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); bool IsStore = Desc->mayStore(); bool IsFlat = TII->isFLATScratch(LoadStoreOp); @@ -1114,10 +1111,9 @@ report_fatal_error("could not scavenge SGPR to spill in entry function"); if (ScratchOffsetReg == AMDGPU::NoRegister) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset) - .addImm(Offset); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset); } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) .addReg(ScratchOffsetReg) .addImm(Offset); } @@ -1170,7 +1166,7 @@ Register Sub = IsSubReg ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane))) : ValueReg; - auto MIB = spillVGPRtoAGPR(ST, MI, Index, Lane, Sub, IsKill); + auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill); if (!MIB.getInstr()) break; if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) { @@ -1216,9 +1212,9 @@ RS->setRegUsed(TmpReg); } if (IsStore) { - auto AccRead = BuildMI(*MBB, MI, DL, - TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg) - .addReg(SubReg, getKillRegState(IsKill)); + auto AccRead = BuildMI(MBB, MI, DL, + TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg) + .addReg(SubReg, getKillRegState(IsKill)); if (NeedSuperRegDef) AccRead.addReg(ValueReg, RegState::ImplicitDefine); AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse); @@ -1231,9 +1227,9 @@ MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize, commonAlignment(Alignment, RemRegOffset)); - auto MIB = BuildMI(*MBB, MI, DL, *Desc) - .addReg(SubReg, - getDefRegState(!IsStore) | getKillRegState(IsKill)); + auto MIB = + BuildMI(MBB, MI, DL, *Desc) + .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)); if (!IsFlat) MIB.addReg(FuncInfo->getScratchRSrcReg()); @@ -1254,9 +1250,9 @@ MIB.addReg(ValueReg, RegState::ImplicitDefine); if (!IsStore && TmpReg != AMDGPU::NoRegister) { - MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), + MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), FinalReg) - .addReg(TmpReg, RegState::Kill); + .addReg(TmpReg, RegState::Kill); MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); } @@ -1266,7 +1262,7 @@ if (ScratchOffsetRegDelta != 0) { // Subtract the offset we added to the ScratchOffset register. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset) + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset) .addReg(SOffset) .addImm(ScratchOffsetRegDelta); } @@ -1293,12 +1289,12 @@ if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, + buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, Offset * SB.EltSize, MMO, SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, + buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, Offset * SB.EltSize, MMO, SB.RS); // This only ever adds one VGPR spill SB.MFI.addToSpilledVGPRs(1); @@ -1573,13 +1569,11 @@ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, - Index, - VData->getReg(), VData->isKill(), - FrameReg, - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), - RS); + auto *MBB = MI->getParent(); + buildSpillLoadStore( + *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), + *MI->memoperands_begin(), RS); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); MI->eraseFromParent(); break; @@ -1609,13 +1603,11 @@ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, - Index, - VData->getReg(), VData->isKill(), - FrameReg, - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), - RS); + auto *MBB = MI->getParent(); + buildSpillLoadStore( + *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), + *MI->memoperands_begin(), RS); MI->eraseFromParent(); break; }