diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -393,28 +393,29 @@ const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo(); uint16_t KernelCodeProperties = 0; + const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo(); - if (MFI.hasPrivateSegmentBuffer()) { + if (UserSGPRInfo.hasPrivateSegmentBuffer()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } - if (MFI.hasDispatchPtr()) { + if (UserSGPRInfo.hasDispatchPtr()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; } - if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) { + if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; } - if (MFI.hasKernargSegmentPtr()) { + if (UserSGPRInfo.hasKernargSegmentPtr()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; } - if (MFI.hasDispatchID()) { + if (UserSGPRInfo.hasDispatchID()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; } - if (MFI.hasFlatScratchInit()) { + if (UserSGPRInfo.hasFlatScratchInit()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; } @@ -1165,27 +1166,28 @@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize(true))); - if (MFI->hasPrivateSegmentBuffer()) { + const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo(); + if (UserSGPRInfo.hasPrivateSegmentBuffer()) { Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } - if (MFI->hasDispatchPtr()) + if (UserSGPRInfo.hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) + if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; - if (MFI->hasKernargSegmentPtr()) + if (UserSGPRInfo.hasKernargSegmentPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; - if (MFI->hasDispatchID()) + if (UserSGPRInfo.hasDispatchID()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; - if (MFI->hasFlatScratchInit()) + if (UserSGPRInfo.hasFlatScratchInit()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; - if (MFI->hasDispatchPtr()) + if (UserSGPRInfo.hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (STM.isXNACKEnabled()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -455,27 +455,28 @@ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) { // FIXME: How should these inputs interact with inreg / custom SGPR inputs? - if (Info.hasPrivateSegmentBuffer()) { + const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo(); + if (UserSGPRInfo.hasPrivateSegmentBuffer()) { Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); CCInfo.AllocateReg(PrivateSegmentBufferReg); } - if (Info.hasDispatchPtr()) { + if (UserSGPRInfo.hasDispatchPtr()) { Register DispatchPtrReg = Info.addDispatchPtr(TRI); MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(DispatchPtrReg); } const Module *M = MF.getFunction().getParent(); - if (Info.hasQueuePtr() && + if (UserSGPRInfo.hasQueuePtr() && AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); } - if (Info.hasKernargSegmentPtr()) { + if (UserSGPRInfo.hasKernargSegmentPtr()) { MachineRegisterInfo &MRI = MF.getRegInfo(); Register InputPtrReg = Info.addKernargSegmentPtr(TRI); const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); @@ -486,13 +487,13 @@ CCInfo.AllocateReg(InputPtrReg); } - if (Info.hasDispatchID()) { + if (UserSGPRInfo.hasDispatchID()) { Register DispatchIDReg = Info.addDispatchID(TRI); MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(DispatchIDReg); } - if (Info.hasFlatScratchInit()) { + if (UserSGPRInfo.hasFlatScratchInit()) { Register FlatScratchInitReg = Info.addFlatScratchInit(TRI); MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(FlatScratchInitReg); @@ -597,15 +598,16 @@ SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); + const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo(); - if (Info->hasImplicitBufferPtr()) { + if (UserSGPRInfo.hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(ImplicitBufferPtrReg); } // FIXME: This probably isn't defined for mesa - if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) { + if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) { Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI); MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(FlatScratchInitReg); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -1103,7 +1103,7 @@ Offset += 8; // Skipped. } - if (MFI.hasQueuePtr()) + if (MFI.getUserSGPRInfo().hasQueuePtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -185,7 +185,7 @@ // // If we only have implicit uses of flat_scr on flat instructions, it is not // really needed. - if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() && + if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() && (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) && !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) && !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -17,6 +17,7 @@ #include "AMDGPULegalizerInfo.h" #include "AMDGPURegisterBankInfo.h" #include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" #include "R600Subtarget.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -692,7 +693,7 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo(); - return getBaseReservedNumSGPRs(MFI.hasFlatScratchInit()); + return getBaseReservedNumSGPRs(MFI.getUserSGPRInfo().hasFlatScratchInit()); } unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const { @@ -770,25 +771,27 @@ getReservedNumSGPRs(MF)); } -static unsigned getMaxNumPreloadedSGPRs() { +static constexpr unsigned getMaxNumPreloadedSGPRs() { + using USI = GCNUserSGPRUsageInfo; // Max number of user SGPRs - unsigned MaxUserSGPRs = 4 + // private segment buffer - 2 + // Dispatch ptr - 2 + // queue ptr - 2 + // kernel segment ptr - 2 + // dispatch ID - 2 + // flat scratch init - 2; // Implicit buffer ptr + const unsigned MaxUserSGPRs = + USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) + + USI::getNumUserSGPRForField(USI::DispatchPtrID) + + USI::getNumUserSGPRForField(USI::QueuePtrID) + + USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) + + USI::getNumUserSGPRForField(USI::DispatchIdID) + + USI::getNumUserSGPRForField(USI::FlatScratchInitID) + + USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID); // Max number of system SGPRs - unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX - 1 + // WorkGroupIDY - 1 + // WorkGroupIDZ - 1 + // WorkGroupInfo - 1; // private segment wave byte offset + const unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX + 1 + // WorkGroupIDY + 1 + // WorkGroupIDZ + 1 + // WorkGroupInfo + 1; // private segment wave byte offset // Max number of synthetic SGPRs - unsigned SyntheticSGPRs = 1; // LDSKernelId + const unsigned SyntheticSGPRs = 1; // LDSKernelId return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs; } @@ -1018,3 +1021,73 @@ else return static_cast(TM.getSubtarget(F)); } + +GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F, + const GCNSubtarget &ST) { + const CallingConv::ID CC = F.getCallingConv(); + const bool IsKernel = + CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL; + // FIXME: Should have analysis or something rather than attribute to detect + // calls. + const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); + // FIXME: This attribute is a hack, we just need an analysis on the function + // to look for allocas. + const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); + + if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)) + KernargSegmentPtr = true; + + bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); + if (IsAmdHsaOrMesa && !ST.enableFlatScratch()) + PrivateSegmentBuffer = true; + else if (ST.isMesaGfxShader(F)) + ImplicitBufferPtr = true; + + if (!AMDGPU::isGraphics(CC)) { + if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) + DispatchPtr = true; + + // FIXME: Can this always be disabled with < COv5? + if (!F.hasFnAttribute("amdgpu-no-queue-ptr")) + QueuePtr = true; + + if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) + DispatchID = true; + } + + // TODO: This could be refined a lot. The attribute is a poor way of + // detecting calls or stack objects that may require it before argument + // lowering. + if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) && + (IsAmdHsaOrMesa || ST.enableFlatScratch()) && + (HasCalls || HasStackObjects || ST.enableFlatScratch()) && + !ST.flatScratchIsArchitected()) { + FlatScratchInit = true; + } +} + +unsigned GCNUserSGPRUsageInfo::getNumUsedUserSGPRs() const { + unsigned NumUserSGPRs = 0; + if (hasImplicitBufferPtr()) + NumUserSGPRs += getNumUserSGPRForField(ImplicitBufferPtrID); + + if (hasPrivateSegmentBuffer()) + NumUserSGPRs += getNumUserSGPRForField(PrivateSegmentBufferID); + + if (hasDispatchPtr()) + NumUserSGPRs += getNumUserSGPRForField(DispatchPtrID); + + if (hasQueuePtr()) + NumUserSGPRs += getNumUserSGPRForField(QueuePtrID); + + if (hasKernargSegmentPtr()) + NumUserSGPRs += getNumUserSGPRForField(KernargSegmentPtrID); + + if (hasDispatchID()) + NumUserSGPRs += getNumUserSGPRForField(DispatchIdID); + + if (hasFlatScratchInit()) + NumUserSGPRs += getNumUserSGPRForField(FlatScratchInitID); + + return NumUserSGPRs; +} diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -22,6 +22,7 @@ #include "SIInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #define GET_SUBTARGETINFO_HEADER #include "AMDGPUGenSubtargetInfo.inc" @@ -1378,6 +1379,79 @@ } }; +class GCNUserSGPRUsageInfo { +public: + unsigned getNumUsedUserSGPRs() const; + + bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; } + + bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; } + + bool hasDispatchPtr() const { return DispatchPtr; } + + bool hasQueuePtr() const { return QueuePtr; } + + bool hasKernargSegmentPtr() const { return KernargSegmentPtr; } + + bool hasDispatchID() const { return DispatchID; } + + bool hasFlatScratchInit() const { return FlatScratchInit; } + + enum UserSGPRID : unsigned { + ImplicitBufferPtrID = 0, + PrivateSegmentBufferID = 1, + DispatchPtrID = 2, + QueuePtrID = 3, + KernargSegmentPtrID = 4, + DispatchIdID = 5, + FlatScratchInitID = 6, + PrivateSegmentSizeID = 7 + }; + + // Returns the size in number of SGPRs for preload user SGPR field. + static constexpr unsigned getNumUserSGPRForField(UserSGPRID ID) { + switch (ID) { + case ImplicitBufferPtrID: + return 2; + case PrivateSegmentBufferID: + return 4; + case DispatchPtrID: + return 2; + case QueuePtrID: + return 2; + case KernargSegmentPtrID: + return 2; + case DispatchIdID: + return 2; + case FlatScratchInitID: + return 2; + case PrivateSegmentSizeID: + return 1; + } + llvm_unreachable("Unknown UserSGPRID."); + } + + GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST); + +private: + // Private memory buffer + // Compute directly in sgpr[0:1] + // Other shaders indirect 64-bits at sgpr[0:1] + bool ImplicitBufferPtr = false; + + bool PrivateSegmentBuffer = false; + + bool DispatchPtr = false; + + bool QueuePtr = false; + + bool KernargSegmentPtr = false; + + bool DispatchID = false; + + bool FlatScratchInit = false; +}; + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -692,7 +692,7 @@ } bool NeedsFlatScratchInit = - MFI->hasFlatScratchInit() && + MFI->getUserSGPRInfo().hasFlatScratchInit() && (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() || (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch())); @@ -775,7 +775,7 @@ // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - if (MFI->hasImplicitBufferPtr()) { + if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) { Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { @@ -814,7 +814,6 @@ BuildMI(MBB, I, DL, SMovB32, Rsrc1) .addExternalSymbol("SCRATCH_RSRC_DWORD1") .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - } BuildMI(MBB, I, DL, SMovB32, Rsrc2) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -15,6 +15,7 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -2139,13 +2140,14 @@ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const { auto &ArgInfo = Info.getArgInfo(); + const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo(); // TODO: Unify handling with private memory pointers. - if (Info.hasDispatchPtr()) + if (UserSGPRInfo.hasDispatchPtr()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); const Module *M = MF.getFunction().getParent(); - if (Info.hasQueuePtr() && + if (UserSGPRInfo.hasQueuePtr() && AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); @@ -2154,7 +2156,7 @@ if (Info.hasImplicitArgPtr()) allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr); - if (Info.hasDispatchID()) + if (UserSGPRInfo.hasDispatchID()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchID); // flat_scratch_init is not applicable for non-kernel functions. @@ -2177,34 +2179,35 @@ MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const { - if (Info.hasImplicitBufferPtr()) { + const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo(); + if (UserSGPRInfo.hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(ImplicitBufferPtrReg); } // FIXME: How should these inputs interact with inreg / custom SGPR inputs? - if (Info.hasPrivateSegmentBuffer()) { + if (UserSGPRInfo.hasPrivateSegmentBuffer()) { Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); CCInfo.AllocateReg(PrivateSegmentBufferReg); } - if (Info.hasDispatchPtr()) { + if (UserSGPRInfo.hasDispatchPtr()) { Register DispatchPtrReg = Info.addDispatchPtr(TRI); MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(DispatchPtrReg); } const Module *M = MF.getFunction().getParent(); - if (Info.hasQueuePtr() && + if (UserSGPRInfo.hasQueuePtr() && AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); } - if (Info.hasKernargSegmentPtr()) { + if (UserSGPRInfo.hasKernargSegmentPtr()) { MachineRegisterInfo &MRI = MF.getRegInfo(); Register InputPtrReg = Info.addKernargSegmentPtr(TRI); CCInfo.AllocateReg(InputPtrReg); @@ -2213,13 +2216,13 @@ MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); } - if (Info.hasDispatchID()) { + if (UserSGPRInfo.hasDispatchID()) { Register DispatchIDReg = Info.addDispatchID(TRI); MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(DispatchIDReg); } - if (Info.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) { + if (UserSGPRInfo.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) { Register FlatScratchInitReg = Info.addFlatScratchInit(TRI); MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(FlatScratchInitReg); @@ -2483,12 +2486,13 @@ bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv); if (IsGraphics) { - assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() && - !Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() && - !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && - !Info->hasWorkItemIDZ()); + const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo(); + assert(!UserSGPRInfo.hasDispatchPtr() && + !UserSGPRInfo.hasKernargSegmentPtr() && !Info->hasWorkGroupInfo() && + !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() && + !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); if (!Subtarget->enableFlatScratch()) - assert(!Info->hasFlatScratchInit()); + assert(!UserSGPRInfo.hasFlatScratchInit()); if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs()) assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ()); @@ -9147,7 +9151,7 @@ const SIMachineFunctionInfo &Info) { // TODO: Should check if the address can definitely not access stack. if (Info.isEntryFunction()) - return Info.hasFlatScratchInit(); + return Info.getUserSGPRInfo().hasFlatScratchInit(); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUMachineFunction.h" #include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "SIModeRegisterDefaults.h" @@ -436,13 +437,9 @@ unsigned NumSpilledSGPRs = 0; unsigned NumSpilledVGPRs = 0; - // Feature bits required for inputs passed in user SGPRs. - bool PrivateSegmentBuffer : 1; - bool DispatchPtr : 1; - bool QueuePtr : 1; - bool KernargSegmentPtr : 1; - bool DispatchID : 1; - bool FlatScratchInit : 1; + // Tracks information about user SGPRs that will be setup by hardware which + // will apply to all wavefronts of the grid. + GCNUserSGPRUsageInfo UserSGPRInfo; // Feature bits required for inputs passed in system SGPRs. bool WorkGroupIDX : 1; // Always initialized. @@ -456,11 +453,6 @@ bool WorkItemIDY : 1; bool WorkItemIDZ : 1; - // Private memory buffer - // Compute directly in sgpr[0:1] - // Other shaders indirect 64-bits at sgpr[0:1] - bool ImplicitBufferPtr : 1; - // Pointer to where the ABI inserts special kernel arguments separate from the // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; @@ -601,6 +593,8 @@ return PrologEpilogSGPRSpills; } + const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; } + void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI) { PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI)); @@ -780,6 +774,8 @@ return ArgInfo.WorkGroupInfo.getRegister(); } + bool hasLDSKernelId() const { return LDSKernelId; } + // Add special VGPR inputs void setWorkItemIDX(ArgDescriptor Arg) { ArgInfo.WorkItemIDX = Arg; @@ -804,30 +800,6 @@ ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); } - bool hasPrivateSegmentBuffer() const { - return PrivateSegmentBuffer; - } - - bool hasDispatchPtr() const { - return DispatchPtr; - } - - bool hasQueuePtr() const { - return QueuePtr; - } - - bool hasKernargSegmentPtr() const { - return KernargSegmentPtr; - } - - bool hasDispatchID() const { - return DispatchID; - } - - bool hasFlatScratchInit() const { - return FlatScratchInit; - } - bool hasWorkGroupIDX() const { return WorkGroupIDX; } @@ -844,8 +816,6 @@ return WorkGroupInfo; } - bool hasLDSKernelId() const { return LDSKernelId; } - bool hasPrivateSegmentWaveByteOffset() const { return PrivateSegmentWaveByteOffset; } @@ -866,10 +836,6 @@ return ImplicitArgPtr; } - bool hasImplicitBufferPtr() const { - return ImplicitBufferPtr; - } - AMDGPUFunctionArgInfo &getArgInfo() { return ArgInfo; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -7,17 +7,18 @@ //===----------------------------------------------------------------------===// #include "SIMachineFunctionInfo.h" -#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" -#include "SIRegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -36,28 +37,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI) - : AMDGPUMachineFunction(F, *STI), - Mode(F), - GWSResourcePSV(getTM(STI)), - PrivateSegmentBuffer(false), - DispatchPtr(false), - QueuePtr(false), - KernargSegmentPtr(false), - DispatchID(false), - FlatScratchInit(false), - WorkGroupIDX(false), - WorkGroupIDY(false), - WorkGroupIDZ(false), - WorkGroupInfo(false), - LDSKernelId(false), - PrivateSegmentWaveByteOffset(false), - WorkItemIDX(false), - WorkItemIDY(false), - WorkItemIDZ(false), - ImplicitBufferPtr(false), - ImplicitArgPtr(false), - GITPtrHigh(0xffffffff), - HighBitsOf32BitAddress(0) { + : AMDGPUMachineFunction(F, *STI), Mode(F), GWSResourcePSV(getTM(STI)), + UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false), + WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false), + PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), + WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false), + GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { const GCNSubtarget &ST = *static_cast(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); @@ -67,16 +52,10 @@ VRegFlags.reserve(1024); - // FIXME: Should have analysis or something rather than attribute to detect - // calls. - const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); - const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL; if (IsKernel) { - if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0) - KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; } else if (CC == CallingConv::AMDGPU_PS) { @@ -128,12 +107,6 @@ MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } - bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); - if (isAmdHsaOrMesa && !ST.enableFlatScratch()) - PrivateSegmentBuffer = true; - else if (ST.isMesaGfxShader(F)) - ImplicitBufferPtr = true; - if (!AMDGPU::isGraphics(CC) || (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) { if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) @@ -158,33 +131,10 @@ ST.getMaxWorkitemID(F, 2) != 0) WorkItemIDZ = true; - if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) - DispatchPtr = true; - - if (!F.hasFnAttribute("amdgpu-no-queue-ptr")) - QueuePtr = true; - - if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) - DispatchID = true; - if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) LDSKernelId = true; } - // FIXME: This attribute is a hack, we just need an analysis on the function - // to look for allocas. - bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); - - // TODO: This could be refined a lot. The attribute is a poor way of - // detecting calls or stack objects that may require it before argument - // lowering. - if (ST.hasFlatAddressSpace() && isEntryFunction() && - (isAmdHsaOrMesa || ST.enableFlatScratch()) && - (HasCalls || HasStackObjects || ST.enableFlatScratch()) && - !ST.flatScratchIsArchitected()) { - FlatScratchInit = true; - } - if (isEntryFunction()) { // X, XY, and XYZ are the only supported combinations, so make sure Y is // enabled if Z is.