Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -307,20 +307,52 @@ assert(!ST.isAmdCodeObjectV2()); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - BuildMI(MBB, I, DL, SMovB32, Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, SMovB32, Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + if (MFI->hasSpillUserPtr()) { + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + unsigned Sgpr01 = TRI->getMatchingSuperReg(AMDGPU::SGPR0, AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); + + MRI.addLiveIn(Sgpr01); + MBB.addLiveIn(Sgpr01); + BuildMI(MBB, I, DL, Mov64, Rsrc01) + .addReg(Sgpr01) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else if (MFI->hasSpillUserPtrLoad()) { + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + unsigned Sgpr01 = TRI->getMatchingSuperReg(AMDGPU::SGPR0, AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); + MRI.addLiveIn(Sgpr01); + MBB.addLiveIn(Sgpr01); + + PointerType *PtrTy = PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + auto MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable, 0, 0); + BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) + .addReg(Sgpr01) + .addImm(0) + .addImm(0) + .addMemOperand(MMO) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + } BuildMI(MBB, I, DL, SMovB32, Rsrc2) .addImm(Rsrc23 & 0xffffffff) Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -112,6 +112,9 @@ bool WorkItemIDY : 1; bool WorkItemIDZ : 1; + bool SpillUserPtr : 1; // Spill to 64-bits address in sgpr[0:1] + bool SpillUserPtrLoad : 1; // Spill to 64-bits loaded from first 2 dwords of buffer at sgpr[0:1] + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -251,6 +254,14 @@ return WorkItemIDZ; } + bool hasSpillUserPtr() const { + return SpillUserPtr; + } + + bool hasSpillUserPtrLoad() const { + return SpillUserPtrLoad; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -77,7 +77,9 @@ PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), - WorkItemIDZ(false) { + WorkItemIDZ(false), + SpillUserPtr(false), + SpillUserPtrLoad(false) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); @@ -126,6 +128,11 @@ if (F->hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; + } else { + if (F->hasFnAttribute("amdgpu-spill-bufsgpr01")) + SpillUserPtr = true; + if (F->hasFnAttribute("amdgpu-spill-bufsgpr01-load")) + SpillUserPtrLoad = true; } // We don't need to worry about accessing spills with flat instructions.