Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -999,7 +999,7 @@ MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - DenseSet SGPRSpillVGPRDefinedSet; + DenseSet SGPRSpillVGPRDefinedSet; // FIXME: This should be removed ArrayRef VGPRSpills = MFI->getSGPRToVGPRSpills(Index); @@ -1032,6 +1032,8 @@ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; + bool UseKill = IsKill && i == NumSubRegs - 1; + // During SGPR spilling to VGPR, determine if the VGPR is defined. The // only circumstance in which we say it is undefined is when it is the // first spill to this VGPR in the first basic block. @@ -1044,7 +1046,7 @@ auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) - .addReg(SubReg, getKillRegState(IsKill)) + .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); @@ -1054,6 +1056,9 @@ MIB.addReg(SuperReg, RegState::ImplicitDefine); } + if (NumSubRegs > 1) + MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit); + // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. Index: llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -0,0 +1,119 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-lower-sgpr-spills,prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# Make sure the initial first $sgpr1 = COPY $sgpr2 copy is not deleted +# by the copy propagation after lowering the spill. + +--- +name: spill_sgpr128_use_subreg +tracksRegLiveness: true +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: spill_sgpr128_use_subreg + ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GCN: renamable $sgpr1 = COPY $sgpr2 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1 + ; GCN: S_ENDPGM 0, implicit $sgpr8 + renamable $sgpr1 = COPY $sgpr2 + SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + renamable $sgpr8 = COPY killed renamable $sgpr1 + S_ENDPGM 0, implicit $sgpr8 +... + +--- +name: spill_sgpr128_use_kill +tracksRegLiveness: true +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: spill_sgpr128_use_kill + ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GCN: renamable $sgpr1 = COPY $sgpr2 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: S_ENDPGM 0 + renamable $sgpr1 = COPY $sgpr2 + SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... + +--- +name: spill_vgpr128_use_subreg +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; GCN-LABEL: name: spill_vgpr128_use_subreg + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GCN: renamable $vgpr1 = COPY $vgpr2 + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; GCN: renamable $vgpr8 = COPY $vgpr2 + ; GCN: S_ENDPGM 0, implicit $vgpr8 + renamable $vgpr1 = COPY $vgpr2 + SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + renamable $vgpr8 = COPY killed renamable $vgpr1 + S_ENDPGM 0, implicit $vgpr8 +... + +--- +name: spill_vgpr128_use_kill +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; GCN-LABEL: name: spill_vgpr128_use_kill + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GCN: renamable $vgpr1 = COPY $vgpr2 + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; GCN: S_ENDPGM 0 + renamable $vgpr1 = COPY $vgpr2 + SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... Index: llvm/test/CodeGen/AMDGPU/spill192.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/spill192.mir +++ llvm/test/CodeGen/AMDGPU/spill192.mir @@ -30,12 +30,12 @@ ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0 ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr4, 0, undef $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr5, 1, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr6, 2, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr7, 3, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr8, 4, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000)