diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -452,7 +452,7 @@ // instruction, the bundle will have been completely undone. if (BundledMI != BundleStart) { BundledMI->removeFromBundle(); - MBB.insert(FirstMI, BundledMI); + MBB.insert(BundleStart, BundledMI); } else if (BundledMI->isBundledWithSucc()) { BundledMI->unbundleFromSucc(); BundleStart = &*std::next(BundledMI->getIterator()); diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -238,3 +238,108 @@ S_BRANCH %bb.2 ... + +--- +name: splitkit_copy_unbundle_reorder +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; RA-LABEL: name: splitkit_copy_unbundle_reorder + ; RA: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; RA: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; RA: [[DEF2:%[0-9]+]]:sgpr_512 = IMPLICIT_DEF + ; RA: [[DEF2]].sub4:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub5:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub10:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub11:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub7:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub8:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub13:sgpr_512 = S_MOV_B32 -1 + ; RA: [[DEF2]].sub14:sgpr_512 = S_MOV_B32 -1 + ; RA: undef %15.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 { + ; RA: internal %15.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11 + ; RA: internal %15.sub7:sgpr_512 = COPY [[DEF2]].sub7 + ; RA: internal %15.sub8:sgpr_512 = COPY [[DEF2]].sub8 + ; RA: internal %15.sub13:sgpr_512 = COPY [[DEF2]].sub13 + ; RA: internal %15.sub14:sgpr_512 = COPY [[DEF2]].sub14 + ; RA: } + ; RA: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) + ; RA: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 + ; RA: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) + ; RA: undef %14.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { + ; RA: internal %14.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 + ; RA: internal %14.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 + ; RA: internal %14.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8 + ; RA: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 + ; RA: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 + ; RA: } + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0, 0 :: (dereferenceable invariant load 4) + ; RA: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]] + ; VR-LABEL: name: splitkit_copy_unbundle_reorder + ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF + ; VR: renamable $sgpr16 = S_MOV_B32 -1 + ; VR: renamable $sgpr17 = S_MOV_B32 -1 + ; VR: renamable $sgpr22 = S_MOV_B32 -1 + ; VR: renamable $sgpr23 = S_MOV_B32 -1 + ; VR: renamable $sgpr19 = S_MOV_B32 -1 + ; VR: renamable $sgpr20 = S_MOV_B32 -1 + ; VR: renamable $sgpr25 = S_MOV_B32 -1 + ; VR: renamable $sgpr26 = S_MOV_B32 -1 + ; VR: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) + ; VR: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 + ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) + ; VR: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17 + ; VR: renamable $sgpr15 = COPY killed renamable $sgpr19 + ; VR: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23 + ; VR: renamable $sgpr16 = COPY killed renamable $sgpr20 + ; VR: renamable $sgpr21 = COPY killed renamable $sgpr25 + ; VR: renamable $sgpr22 = COPY killed renamable $sgpr26 + ; VR: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF + ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr10_sgpr11 = IMPLICIT_DEF + ; VR: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0, 0 :: (dereferenceable invariant load 4) + ; VR: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr10_sgpr11, implicit killed renamable $sgpr8, implicit killed renamable $sgpr9, implicit killed renamable $sgpr12, implicit killed renamable $sgpr13, implicit killed renamable $sgpr14, implicit killed renamable $sgpr15, implicit killed renamable $sgpr16, implicit killed renamable $sgpr17 + %0:sgpr_128 = IMPLICIT_DEF + %1:sreg_64 = IMPLICIT_DEF + %2:sgpr_512 = IMPLICIT_DEF + + %2.sub4:sgpr_512 = S_MOV_B32 -1 + %2.sub5:sgpr_512 = S_MOV_B32 -1 + %2.sub10:sgpr_512 = S_MOV_B32 -1 + %2.sub11:sgpr_512 = S_MOV_B32 -1 + %2.sub7:sgpr_512 = S_MOV_B32 -1 + %2.sub8:sgpr_512 = S_MOV_B32 -1 + %2.sub13:sgpr_512 = S_MOV_B32 -1 + %2.sub14:sgpr_512 = S_MOV_B32 -1 + + ; Clobber registers + S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 + + %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0, 0 :: (dereferenceable invariant load 4) + + S_NOP 0, implicit %0, implicit %1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12 + +...