diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -60,7 +60,7 @@ SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -165,7 +165,8 @@ } // Shift down registers reserved for the scratch wave offset. -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +std::pair +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -176,17 +177,17 @@ // No replacement necessary. if (ScratchWaveOffsetReg == AMDGPU::NoRegister || (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { - return AMDGPU::NoRegister; + return std::make_pair(AMDGPU::NoRegister, false); } if (ST.hasSGPRInitBug()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -207,10 +208,11 @@ unsigned ReservedRegCount = 13; if (AllSGPRs.size() < ReservedRegCount) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); bool HandledScratchWaveOffsetReg = ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + bool FPAdjusted = false; for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the @@ -228,12 +230,13 @@ MFI->setScratchWaveOffsetReg(Reg); MFI->setFrameOffsetReg(Reg); ScratchWaveOffsetReg = Reg; + FPAdjusted = true; break; } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, FPAdjusted); } void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, @@ -269,7 +272,9 @@ unsigned ScratchRsrcReg = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg = + unsigned ScratchWaveOffsetReg; + bool FPAdjusted; + std::tie(ScratchWaveOffsetReg, FPAdjusted) = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); // We need to insert initialization of the scratch resource descriptor. @@ -307,7 +312,7 @@ if (&OtherBB == &MBB) continue; - if (OffsetRegUsed) + if (OffsetRegUsed || FPAdjusted) OtherBB.addLiveIn(ScratchWaveOffsetReg); if (ResourceRegUsed) diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir @@ -0,0 +1,59 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s + + +# CHECK-LABEL: name: foo +# CHECK: BUFFER_STORE_DWORD_OFFSET +--- | + + define amdgpu_kernel void @foo() #0 { + ret void + } + + attributes #0 = { nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="48" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" "unsafe-fp-math"="false" "use-soft-float"="false" } +... +--- +name: foo +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0' } + - { reg: '$sgpr4_sgpr5' } + - { reg: '$sgpr6_sgpr7' } + - { reg: '$sgpr8' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4 } + - { id: 2, type: spill-slot, size: 4, alignment: 4 } + - { id: 3, type: spill-slot, size: 4, alignment: 4 } + - { id: 4, type: spill-slot, size: 4, alignment: 4 } + - { id: 5, type: spill-slot, size: 4, alignment: 4 } + - { id: 6, type: spill-slot, size: 4, alignment: 4 } + - { id: 7, type: spill-slot, size: 4, alignment: 4 } + - { id: 8, type: spill-slot, size: 4, alignment: 4 } + - { id: 9, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + explicitKernArgSize: 660 + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + scratchWaveOffsetReg: '$sgpr101' + frameOffsetReg: '$sgpr101' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7 + + bb.1: + liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7 + + renamable $vgpr2 = IMPLICIT_DEF + SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)