diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1067,15 +1067,15 @@ auto parseAndCheckArgument = [&](const Optional &A, const TargetRegisterClass &RC, - ArgDescriptor &Arg) { + ArgDescriptor &Arg, unsigned UserSGPRs, + unsigned SystemSGPRs) { // Skip parsing if it's not present. if (!A) return false; if (A->IsRegister) { unsigned Reg; - if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, - Error)) { + if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) { SourceRange = A->RegisterName.SourceRange; return true; } @@ -1088,60 +1088,62 @@ if (A->Mask) Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue()); + MFI->NumUserSGPRs += UserSGPRs; + MFI->NumSystemSGPRs += SystemSGPRs; return false; }; if (YamlMFI.ArgInfo && (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer, AMDGPU::SReg_128RegClass, - MFI->ArgInfo.PrivateSegmentBuffer) || + MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr, - AMDGPU::SReg_64RegClass, - MFI->ArgInfo.DispatchPtr) || + AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr, + 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.QueuePtr) || + MFI->ArgInfo.QueuePtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.KernargSegmentPtr) || + MFI->ArgInfo.KernargSegmentPtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID, - AMDGPU::SReg_64RegClass, - MFI->ArgInfo.DispatchID) || + AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID, + 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.FlatScratchInit) || + MFI->ArgInfo.FlatScratchInit, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.PrivateSegmentSize) || + MFI->ArgInfo.PrivateSegmentSize, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDX) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDY) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDZ) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupInfo) || + MFI->ArgInfo.WorkGroupInfo, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.PrivateSegmentWaveByteOffset) || + MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.ImplicitArgPtr) || + MFI->ArgInfo.ImplicitArgPtr, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.ImplicitBufferPtr) || + MFI->ArgInfo.ImplicitBufferPtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDX) || + MFI->ArgInfo.WorkItemIDX, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDY) || + MFI->ArgInfo.WorkItemIDY, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDZ))) + MFI->ArgInfo.WorkItemIDZ, 0, 0))) return true; MFI->Mode.IEEE = YamlMFI.Mode.IEEE; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -66,7 +66,7 @@ SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -311,7 +311,8 @@ } // Shift down registers reserved for the scratch wave offset. -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +std::pair +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -322,17 +323,17 @@ // No replacement necessary. if (ScratchWaveOffsetReg == AMDGPU::NoRegister || (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { - return AMDGPU::NoRegister; + return std::make_pair(AMDGPU::NoRegister, false); } if (ST.hasSGPRInitBug()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -353,10 +354,11 @@ unsigned ReservedRegCount = 13; if (AllSGPRs.size() < ReservedRegCount) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); bool HandledScratchWaveOffsetReg = ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + bool FPAdjusted = false; for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the @@ -374,12 +376,13 @@ MFI->setScratchWaveOffsetReg(Reg); MFI->setFrameOffsetReg(Reg); ScratchWaveOffsetReg = Reg; + FPAdjusted = true; break; } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, FPAdjusted); } void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, @@ -415,7 +418,9 @@ unsigned ScratchRsrcReg = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg = + unsigned ScratchWaveOffsetReg; + bool FPAdjusted; + std::tie(ScratchWaveOffsetReg, FPAdjusted) = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); // We need to insert initialization of the scratch resource descriptor. @@ -453,7 +458,7 @@ if (&OtherBB == &MBB) continue; - if (OffsetRegUsed) + if (OffsetRegUsed || FPAdjusted) OtherBB.addLiveIn(ScratchWaveOffsetReg); if (ResourceRegUsed) diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir @@ -0,0 +1,50 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s + + +# CHECK-LABEL: name: foo +# CHECK: BUFFER_STORE_DWORD_OFFSET +--- | + + define amdgpu_kernel void @foo() #0 { + ret void + } + + attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" } +... +--- +name: foo +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0' } + - { reg: '$sgpr4_sgpr5' } + - { reg: '$sgpr6_sgpr7' } + - { reg: '$sgpr8' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + explicitKernArgSize: 660 + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + scratchWaveOffsetReg: '$sgpr101' + frameOffsetReg: '$sgpr101' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7 + + bb.1: + liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7 + + renamable $vgpr2 = IMPLICIT_DEF + SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)