diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -958,6 +958,42 @@ case AMDGPU::SI_SPILL_AV512_SAVE: case AMDGPU::SI_SPILL_AV512_RESTORE: return 16; + case AMDGPU::SI_SPILL_S384_SAVE: + case AMDGPU::SI_SPILL_S384_RESTORE: + case AMDGPU::SI_SPILL_V384_SAVE: + case AMDGPU::SI_SPILL_V384_RESTORE: + case AMDGPU::SI_SPILL_A384_SAVE: + case AMDGPU::SI_SPILL_A384_RESTORE: + case AMDGPU::SI_SPILL_AV384_SAVE: + case AMDGPU::SI_SPILL_AV384_RESTORE: + return 12; + case AMDGPU::SI_SPILL_S352_SAVE: + case AMDGPU::SI_SPILL_S352_RESTORE: + case AMDGPU::SI_SPILL_V352_SAVE: + case AMDGPU::SI_SPILL_V352_RESTORE: + case AMDGPU::SI_SPILL_A352_SAVE: + case AMDGPU::SI_SPILL_A352_RESTORE: + case AMDGPU::SI_SPILL_AV352_SAVE: + case AMDGPU::SI_SPILL_AV352_RESTORE: + return 11; + case AMDGPU::SI_SPILL_S320_SAVE: + case AMDGPU::SI_SPILL_S320_RESTORE: + case AMDGPU::SI_SPILL_V320_SAVE: + case AMDGPU::SI_SPILL_V320_RESTORE: + case AMDGPU::SI_SPILL_A320_SAVE: + case AMDGPU::SI_SPILL_A320_RESTORE: + case AMDGPU::SI_SPILL_AV320_SAVE: + case AMDGPU::SI_SPILL_AV320_RESTORE: + return 10; + case AMDGPU::SI_SPILL_S288_SAVE: + case AMDGPU::SI_SPILL_S288_RESTORE: + case AMDGPU::SI_SPILL_V288_SAVE: + case AMDGPU::SI_SPILL_V288_RESTORE: + case AMDGPU::SI_SPILL_A288_SAVE: + case AMDGPU::SI_SPILL_A288_RESTORE: + case AMDGPU::SI_SPILL_AV288_SAVE: + case AMDGPU::SI_SPILL_AV288_RESTORE: + return 9; case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_V256_SAVE: @@ -1936,6 +1972,10 @@ switch (MI->getOpcode()) { case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S384_SAVE: + case AMDGPU::SI_SPILL_S352_SAVE: + case AMDGPU::SI_SPILL_S320_SAVE: + case AMDGPU::SI_SPILL_S288_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S224_SAVE: case AMDGPU::SI_SPILL_S192_SAVE: @@ -1947,6 +1987,10 @@ return spillSGPR(MI, FI, RS, Indexes, LIS, true); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S384_RESTORE: + case AMDGPU::SI_SPILL_S352_RESTORE: + case AMDGPU::SI_SPILL_S320_RESTORE: + case AMDGPU::SI_SPILL_S288_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE: @@ -1984,6 +2028,10 @@ // SGPR register spill case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S384_SAVE: + case AMDGPU::SI_SPILL_S352_SAVE: + case AMDGPU::SI_SPILL_S320_SAVE: + case AMDGPU::SI_SPILL_S288_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S224_SAVE: case AMDGPU::SI_SPILL_S192_SAVE: @@ -1998,6 +2046,10 @@ // SGPR register restore case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S384_RESTORE: + case AMDGPU::SI_SPILL_S352_RESTORE: + case AMDGPU::SI_SPILL_S320_RESTORE: + case AMDGPU::SI_SPILL_S288_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE: @@ -2012,6 +2064,10 @@ // VGPR register spill case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V384_SAVE: + case AMDGPU::SI_SPILL_V352_SAVE: + case AMDGPU::SI_SPILL_V320_SAVE: + case AMDGPU::SI_SPILL_V288_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V224_SAVE: case AMDGPU::SI_SPILL_V192_SAVE: @@ -2022,6 +2078,10 @@ case AMDGPU::SI_SPILL_V32_SAVE: case AMDGPU::SI_SPILL_A1024_SAVE: case AMDGPU::SI_SPILL_A512_SAVE: + case AMDGPU::SI_SPILL_A384_SAVE: + case AMDGPU::SI_SPILL_A352_SAVE: + case AMDGPU::SI_SPILL_A320_SAVE: + case AMDGPU::SI_SPILL_A288_SAVE: case AMDGPU::SI_SPILL_A256_SAVE: case AMDGPU::SI_SPILL_A224_SAVE: case AMDGPU::SI_SPILL_A192_SAVE: @@ -2032,6 +2092,10 @@ case AMDGPU::SI_SPILL_A32_SAVE: case AMDGPU::SI_SPILL_AV1024_SAVE: case AMDGPU::SI_SPILL_AV512_SAVE: + case AMDGPU::SI_SPILL_AV384_SAVE: + case AMDGPU::SI_SPILL_AV352_SAVE: + case AMDGPU::SI_SPILL_AV320_SAVE: + case AMDGPU::SI_SPILL_AV288_SAVE: case AMDGPU::SI_SPILL_AV256_SAVE: case AMDGPU::SI_SPILL_AV224_SAVE: case AMDGPU::SI_SPILL_AV192_SAVE: @@ -2064,6 +2128,10 @@ case AMDGPU::SI_SPILL_V192_RESTORE: case AMDGPU::SI_SPILL_V224_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: + case AMDGPU::SI_SPILL_V288_RESTORE: + case AMDGPU::SI_SPILL_V320_RESTORE: + case AMDGPU::SI_SPILL_V352_RESTORE: + case AMDGPU::SI_SPILL_V384_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_V1024_RESTORE: case AMDGPU::SI_SPILL_A32_RESTORE: @@ -2074,6 +2142,10 @@ case AMDGPU::SI_SPILL_A192_RESTORE: case AMDGPU::SI_SPILL_A224_RESTORE: case AMDGPU::SI_SPILL_A256_RESTORE: + case AMDGPU::SI_SPILL_A288_RESTORE: + case AMDGPU::SI_SPILL_A320_RESTORE: + case AMDGPU::SI_SPILL_A352_RESTORE: + case AMDGPU::SI_SPILL_A384_RESTORE: case AMDGPU::SI_SPILL_A512_RESTORE: case AMDGPU::SI_SPILL_A1024_RESTORE: case AMDGPU::SI_SPILL_AV32_RESTORE: @@ -2084,6 +2156,10 @@ case AMDGPU::SI_SPILL_AV192_RESTORE: case AMDGPU::SI_SPILL_AV224_RESTORE: case AMDGPU::SI_SPILL_AV256_RESTORE: + case AMDGPU::SI_SPILL_AV288_RESTORE: + case AMDGPU::SI_SPILL_AV320_RESTORE: + case AMDGPU::SI_SPILL_AV352_RESTORE: + case AMDGPU::SI_SPILL_AV384_RESTORE: case AMDGPU::SI_SPILL_AV512_RESTORE: case AMDGPU::SI_SPILL_AV1024_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, diff --git a/llvm/test/CodeGen/AMDGPU/spill288.mir b/llvm/test/CodeGen/AMDGPU/spill288.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill288.mir @@ -0,0 +1,125 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 288 bit registers works. + +--- +name: spill_restore_sgpr288 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr288 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; SPILLED-NEXT: SI_SPILL_S288_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s288) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = SI_SPILL_S288_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s288) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-LABEL: name: spill_restore_sgpr288 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 8, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7 + ; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8 + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + bb.0: + S_NOP 0, implicit-def %0:sgpr_288 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr288 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr288 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 + ; SPILLED-NEXT: SI_SPILL_S288_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, %stack.0, $sgpr32, 0, implicit $exec :: (store (s288) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 + ; EXPANDED-LABEL: name: spill_restore_vgpr288 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 + ; EXPANDED-NEXT: SI_SPILL_S288_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, %stack.0, $sgpr32, 0, implicit $exec :: (store (s288) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 + bb.0: + S_NOP 0, implicit-def %0:vreg_288 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill320.mir b/llvm/test/CodeGen/AMDGPU/spill320.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill320.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 320 bit registers works. + +--- +name: spill_restore_sgpr320 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr320 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; SPILLED-NEXT: SI_SPILL_S320_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s320) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = SI_SPILL_S320_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s320) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-LABEL: name: spill_restore_sgpr320 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 8, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr13, 9, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7 + ; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8 + ; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 9 + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + bb.0: + S_NOP 0, implicit-def %0:sgpr_320 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr320 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr320 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SPILLED-NEXT: SI_SPILL_S320_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9, %stack.0, $sgpr32, 0, implicit $exec :: (store (s320) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; EXPANDED-LABEL: name: spill_restore_vgpr320 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; EXPANDED-NEXT: SI_SPILL_S320_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9, %stack.0, $sgpr32, 0, implicit $exec :: (store (s320) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + bb.0: + S_NOP 0, implicit-def %0:vreg_320 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill352.mir b/llvm/test/CodeGen/AMDGPU/spill352.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill352.mir @@ -0,0 +1,129 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 352 bit registers works. + +--- +name: spill_restore_sgpr352 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr352 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; SPILLED-NEXT: SI_SPILL_S352_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s352) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 = SI_SPILL_S352_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s352) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-LABEL: name: spill_restore_sgpr352 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 8, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 9, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr14, 10, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7 + ; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8 + ; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 9 + ; EXPANDED-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 10 + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + bb.0: + S_NOP 0, implicit-def %0:sgpr_352 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr352 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr352 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 + ; SPILLED-NEXT: SI_SPILL_S352_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10, %stack.0, $sgpr32, 0, implicit $exec :: (store (s352) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 + ; EXPANDED-LABEL: name: spill_restore_vgpr352 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 + ; EXPANDED-NEXT: SI_SPILL_S352_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10, %stack.0, $sgpr32, 0, implicit $exec :: (store (s352) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 + bb.0: + S_NOP 0, implicit-def %0:vreg_352 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill384.mir b/llvm/test/CodeGen/AMDGPU/spill384.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill384.mir @@ -0,0 +1,131 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 384 bit registers works. + +--- +name: spill_restore_sgpr384 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr384 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; SPILLED-NEXT: SI_SPILL_S384_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s384) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S384_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s384) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-LABEL: name: spill_restore_sgpr384 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 8, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 9, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 10, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr15, 11, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: liveins: $vgpr0 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7 + ; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8 + ; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 9 + ; EXPANDED-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 10 + ; EXPANDED-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 11 + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + bb.0: + S_NOP 0, implicit-def %0:sgpr_384 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr384 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr384 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; SPILLED-NEXT: SI_SPILL_S384_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, %stack.0, $sgpr32, 0, implicit $exec :: (store (s384) into %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; EXPANDED-LABEL: name: spill_restore_vgpr384 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; EXPANDED-NEXT: SI_SPILL_S384_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, %stack.0, $sgpr32, 0, implicit $exec :: (store (s384) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + bb.0: + S_NOP 0, implicit-def %0:vreg_384 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +...