Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -265,6 +265,12 @@ "Dummy feature to disable assembler instructions" >; +def FeatureSpillUserPtr : SubtargetFeature<"spill-userptr", + "EnableSpillUserPtr", + "true", + "Enable spilling of VGPRs to scratch memory address passed in userdata 0 and 1" +>; + class SubtargetFeatureGeneration Implies> : SubtargetFeature getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); - // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - BuildMI(MBB, I, DL, SMovB32, Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, SMovB32, Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + if (ST.hasSpillUserPtr()) { + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + const MCInstrDesc &MoveDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); + unsigned sgpr01 = TRI->getMatchingSuperReg(AMDGPU::SGPR0, AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + MRI.addLiveIn(AMDGPU::SGPR0); + MBB.addLiveIn(AMDGPU::SGPR0); + + BuildMI(MBB, I, DL, MoveDwordX2, Rsrc01) + .addReg(sgpr01, RegState::Undef) + .addImm(0); + BuildMI(MBB, I, DL, MoveDwordX2, Rsrc01) + .addReg(Rsrc01, RegState::Undef, AMDGPU::sub0_sub1) + .addImm(0); + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + + // Use relocations to get the pointer, and setup the other bits manually. + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } BuildMI(MBB, I, DL, SMovB32, Rsrc2) .addImm(Rsrc23 & 0xffffffff) Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -110,6 +110,8 @@ bool WorkItemIDY : 1; bool WorkItemIDZ : 1; + bool SpillUserPtr : 1; // Spill to userdata 0/1 + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -248,6 +250,10 @@ return WorkItemIDZ; } + bool hasSpillUserPtr() const { + return SpillUserPtr; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -81,7 +81,8 @@ PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), - WorkItemIDZ(false) { + WorkItemIDZ(false), + SpillUserPtr(false) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); @@ -127,7 +128,8 @@ if (F->hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; - } + } else if (ST.hasSpillUserPtr()) + SpillUserPtr = true; // We don't need to worry about accessing spills with flat instructions. // TODO: On VI where we must use flat for global, we should be able to omit