Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -265,6 +265,12 @@ "Dummy feature to disable assembler instructions" >; +def FeatureSpillUserPtr : SubtargetFeature<"spill-userptr", + "EnableSpillUserPtr", + "true", + "Enable spilling of VGPRs to scratch memory address passed in userdata 0 and 1" +>; + class SubtargetFeatureGeneration Implies> : SubtargetFeature get(AMDGPU::S_MOV_B32); - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); - // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - BuildMI(MBB, I, DL, SMovB32, Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, SMovB32, Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + if (ST.hasSpillUserPtr()) { + assert(MFI->hasSpillUserPtr()); + unsigned SpillUserPtrUserSGPR = MFI->getSpillUserPtrUserSGPR(); + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64); + MRI.addLiveIn(SpillUserPtrUserSGPR); + MBB.addLiveIn(SpillUserPtrUserSGPR); + + BuildMI(MBB, I, DL, SMovB64, Rsrc01) + .addReg(SpillUserPtrUserSGPR, RegState::Kill); + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + + // Use relocations to get the pointer, and setup the other bits manually. + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } BuildMI(MBB, I, DL, SMovB32, Rsrc2) .addImm(Rsrc23 & 0xffffffff) Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -735,6 +735,12 @@ CCInfo.AllocateReg(FlatScratchInitReg); } + if (Info->hasSpillUserPtr()) { + unsigned SpillUserPtrReg = Info->addSpillUserPtr(*TRI); + MF.addLiveIn(SpillUserPtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(SpillUserPtrReg); + } + if (!AMDGPU::isShader(CallConv)) analyzeFormalArgumentsCompute(CCInfo, Ins); else Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -49,6 +49,9 @@ unsigned GridWorkGroupCountYUserSGPR; unsigned GridWorkGroupCountZUserSGPR; + // Input registers set up for spilltouserdata + unsigned SpillUserPtrUserSGPR; + // System SGPRs in allocation order. unsigned WorkGroupIDXSystemSGPR; unsigned WorkGroupIDYSystemSGPR; @@ -112,6 +115,8 @@ bool WorkItemIDY : 1; bool WorkItemIDZ : 1; + bool SpillUserPtr : 1; // Spill to userdata 0/1 + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -147,6 +152,7 @@ unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); + unsigned addSpillUserPtr(const SIRegisterInfo &TRI); // Add system SGPRs. unsigned addWorkGroupIDX() { @@ -251,6 +257,10 @@ return WorkItemIDZ; } + bool hasSpillUserPtr() const { + return SpillUserPtr; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -287,6 +297,10 @@ return QueuePtrUserSGPR; } + unsigned getSpillUserPtrUserSGPR() const { + return SpillUserPtrUserSGPR; + } + bool hasSpilledSGPRs() const { return HasSpilledSGPRs; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -41,6 +41,7 @@ GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), + SpillUserPtrUserSGPR(AMDGPU::NoRegister), WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), @@ -77,7 +78,8 @@ PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), - WorkItemIDZ(false) { + WorkItemIDZ(false), + SpillUserPtr(false) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); @@ -126,7 +128,8 @@ if (F->hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; - } + } else if (ST.hasSpillUserPtr()) + SpillUserPtr = true; // We don't need to worry about accessing spills with flat instructions. // TODO: On VI where we must use flat for global, we should be able to omit @@ -182,6 +185,13 @@ return FlatScratchInitUserSGPR; } +unsigned SIMachineFunctionInfo::addSpillUserPtr(const SIRegisterInfo &TRI) { + SpillUserPtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return SpillUserPtrUserSGPR; +} + SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( MachineFunction *MF, unsigned FrameIndex,