diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1167,11 +1167,13 @@ if (SpillVGPRToAGPR) { // To track the spill frame indices handled in this pass. BitVector SpillFIs(MFI.getObjectIndexEnd(), false); + BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); bool SeenDbgInstr = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + int FrameIndex; if (MI.isDebugInstr()) SeenDbgInstr = true; @@ -1191,10 +1193,18 @@ SpillFIs.set(FI); continue; } - } + } else if (TII->isStoreToStackSlot(MI, FrameIndex) || + TII->isLoadFromStackSlot(MI, FrameIndex)) + NonVGPRSpillFIs.set(FrameIndex); } } + // Stack slot coloring may assign different objets to the same stack slot. + // If not, then the VGPR to AGPR spill slot is dead. + for (unsigned FI : SpillFIs.set_bits()) + if (!NonVGPRSpillFIs.test(FI)) + FuncInfo->setVGPRToAGPRSpillDead(FI); + for (MachineBasicBlock &MBB : MF) { for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) MBB.addLiveIn(Reg); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -465,6 +465,7 @@ struct VGPRSpillToAGPR { SmallVector Lanes; bool FullyAllocated = false; + bool IsDead = false; }; // Map WWM VGPR to a stack slot that is used to save/restore it in the @@ -546,6 +547,12 @@ : I->second.Lanes[Lane]; } + void setVGPRToAGPRSpillDead(int FrameIndex) { + auto I = VGPRToAGPRSpills.find(FrameIndex); + if (I != VGPRToAGPRSpills.end()) + I->second.IsDead = true; + } + bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -444,7 +444,7 @@ MFI.setStackID(i, TargetStackID::Default); for (auto &R : VGPRToAGPRSpills) { - if (R.second.FullyAllocated) + if (R.second.IsDead) MFI.RemoveStackObject(R.first); } } diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s + +--- +name: same_slot_agpr_sgpr +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: same_slot_agpr_sgpr + ; CHECK: liveins: $agpr0, $agpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5 + ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + $sgpr4_sgpr5 = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... +--- +name: diff_slot_agpr_sgpr +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: diff_slot_agpr_sgpr + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + $sgpr4_sgpr5 = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... +--- +name: dead_vgpr_slot +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: dead_vgpr_slot + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + S_ENDPGM 0 +...