Index: llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -310,10 +310,13 @@ const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); std::unique_ptr RS; + bool NewReservedRegs = false; + // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be // handled as SpilledToReg in regular PrologEpilogInserter. - if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || - SpillVGPRToAGPR) { + const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && + (HasCSRs || FuncInfo->hasSpilledSGPRs()); + if (HasSGPRSpillToVGPR || SpillVGPRToAGPR) { // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs // are spilled to VGPRs, in which case we can eliminate the stack usage. // @@ -338,6 +341,7 @@ TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg))) { + NewReservedRegs = true; if (!RS) RS.reset(new RegScavenger()); @@ -354,6 +358,7 @@ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { + NewReservedRegs = true; bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); (void)Spilled; assert(Spilled && "failed to spill SGPR to VGPR when allocated"); @@ -382,5 +387,9 @@ SaveBlocks.clear(); RestoreBlocks.clear(); + // Updated the reserved registers with any VGPRs added for SGPR spills. + if (NewReservedRegs) + MRI.freezeReservedRegs(MF); + return MadeChange; } Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1150,7 +1150,6 @@ MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - DenseSet SGPRSpillVGPRDefinedSet; // FIXME: This should be removed ArrayRef VGPRSpills = MFI->getSGPRToVGPRSpills(Index); @@ -1186,20 +1185,13 @@ bool UseKill = IsKill && i == NumSubRegs - 1; - // During SGPR spilling to VGPR, determine if the VGPR is defined. The - // only circumstance in which we say it is undefined is when it is the - // first spill to this VGPR in the first basic block. - bool VGPRDefined = true; - if (MBB == &MF->front()) - VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second; - // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) - .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); + .addReg(Spill.VGPR); if (i == 0 && NumSubRegs > 1) { // We may be spilling a super-register which is only partially defined, Index: llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir +++ llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir @@ -14,10 +14,10 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $exec_lo ; CHECK: $sgpr0 = S_MOV_B32 $exec_lo - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_lo @@ -38,10 +38,10 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $exec_hi ; CHECK: $sgpr0 = S_MOV_B32 $exec_hi - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_hi @@ -62,13 +62,13 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $exec ; CHECK: $sgpr0_sgpr1 = S_MOV_B64 $exec - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 + ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec @@ -91,10 +91,10 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo @@ -113,10 +113,10 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi @@ -135,13 +135,13 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 + ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec Index: llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir +++ llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir @@ -15,10 +15,10 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $m0 ; CHECK: $sgpr0 = S_MOV_B32 $m0 - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK: S_NOP 0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec @@ -43,10 +43,10 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0 - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK: S_NOP 0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir +++ llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir @@ -21,7 +21,7 @@ ; CHECK-LABEL: name: sgpr_spill_s64_undef_high32 ; CHECK: liveins: $sgpr4, $vgpr0 - ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) @@ -45,7 +45,7 @@ ; CHECK-LABEL: name: sgpr_spill_s64_undef_low32 ; CHECK: liveins: $sgpr5, $vgpr0 - ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) Index: llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -22,7 +22,7 @@ ; GCN-LABEL: name: spill_sgpr128_use_subreg ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN: renamable $sgpr1 = COPY $sgpr2 - ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -52,7 +52,7 @@ ; GCN-LABEL: name: spill_sgpr128_use_kill ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN: renamable $sgpr1 = COPY $sgpr2 - ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 Index: llvm/test/CodeGen/AMDGPU/spill192.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/spill192.mir +++ llvm/test/CodeGen/AMDGPU/spill192.mir @@ -30,7 +30,7 @@ ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0 ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 Index: llvm/test/CodeGen/AMDGPU/vgpr-spill.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-spill.mir +++ llvm/test/CodeGen/AMDGPU/vgpr-spill.mir @@ -124,3 +124,25 @@ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) ... + +--- +name: spill_v128_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 16, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: spill_v128_kill + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, addrspace 5) +...